Skip to content

Commit

Permalink
Merge db76ab4 into f4720da
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephMontoya-TRI committed Jul 26, 2022
2 parents f4720da + db76ab4 commit f4995b3
Show file tree
Hide file tree
Showing 65 changed files with 7,870 additions and 2,694 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/camd-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
os: [
ubuntu-latest,
]
python-version: [3.7]
python-version: [3.8]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/camd-test-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
macos-latest,
# windows-latest
]
python-version: [3.7]
python-version: [3.9]

runs-on: ${{ matrix.os }}

Expand All @@ -27,7 +27,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .[tests]
pip install -e .[tests,m3gnet,atomate,proto_dft]
- name: pytest
env:
MPLBACKEND: "Agg"
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SHELL ["/bin/bash", "-c"]
ENV PATH="/opt/conda/bin/:$PATH"

RUN mkdir -p /home/camd && \
conda create -n camd python=3.7 && \
conda create -n camd python=3.8 && \
apt-get update && \
apt-get -y install gcc g++

Expand All @@ -22,4 +22,4 @@ RUN source /opt/conda/bin/activate camd && \
pip install -r requirements.txt

COPY camd /home/camd/camd
RUN pip install -e .
RUN pip install -e .[proto_dft,m3gnet,atomate]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
![Testing - main](https://github.com/TRI-AMDD/CAMD/workflows/Testing%20-%20main/badge.svg)
![Linting](https://github.com/TRI-AMDD/CAMD/workflows/Linting/badge.svg)
[![Coverage Status](https://coveralls.io/repos/github/TRI-AMDD/CAMD/badge.svg)](https://coveralls.io/github/TRI-AMDD/CAMD)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/TRI-AMDD/camd/blob/master/examples/main_tutorial.ipynb)

CAMD provides a flexible software framework for sequential / Bayesian optimization type campaigns for materials discovery. Its key features include:
* **Agents**: Decision making entities which select experiments to run from pre-determined candidate sets. Agents can combine machine learning with physical or chemical constructs, logic, heuristics, exploration-exploitation strategies and so on. CAMD comes with several generic and structure-discovery focused agents, which can be used by the users as templates to derive new ones.
Expand Down
117 changes: 90 additions & 27 deletions camd/agent/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor

from camd.agent.base import HypothesisAgent

Expand Down Expand Up @@ -61,7 +63,7 @@ def get_hypotheses(self, candidate_data, seed_data=None):

self.candidate_data = candidate_data.drop(columns=["target"], axis=1)
self.seed_data = seed_data
X_seed = seed_data.drop(columns=["target"], axis=1, errors='ignore')
X_seed = seed_data.drop(columns=["target"], axis=1, errors="ignore")
y_seed = seed_data["target"]
steps = [
("scaler", StandardScaler()),
Expand Down Expand Up @@ -203,8 +205,8 @@ def get_hypotheses(self, candidate_data, seed_data=None):
2
* np.log(
len(self.candidate_data)
* _t ** 2
* np.pi ** 2
* _t**2
* np.pi**2
/ 6
/ self.kwargs.get("delta", 0.1)
)
Expand Down Expand Up @@ -247,36 +249,37 @@ def get_hypotheses(self, candidate_data, seed_data=None):
return self.candidate_data.loc[batch]


class LinearAgent(HypothesisAgent):
class RegressorAgent(HypothesisAgent):
"""
Linear regression based agent that tries to maximize a target.
Best for simple checks and benchmarks.
"""

def __init__(
self,
model,
features=None,
target="target",
candidate_data=None,
seed_data=None,
n_query: int = None,
fit_intercept: bool = True,
positive: bool = False,
):

"""
Args:
model (sklearn.RegressorMixin): some regressor with "fit" method
candidate_data (pandas.DataFrame): data about the candidates to search over. Must have a "target" column,
and at least one additional column that can be used as descriptors.
seed_data (pandas.DataFrame): data which to fit the Agent to.
n_query (int): number of queries in allowed. Defaults to 1.
fit_intercept (bool): if the intercept is fit for the linear regression
positive (bool): if true, constraint coefficients to be positive for the linear regression
"""
self.model = model
self.features = features
self.target = target
self.candidate_data = candidate_data
self.seed_data = seed_data
self.n_query = n_query if n_query else 1
self.fit_intercept = fit_intercept
self.positive = positive
super(LinearAgent).__init__()
super(RegressorAgent).__init__()

def get_hypotheses(self, candidate_data, seed_data=None):
"""
Expand All @@ -291,31 +294,91 @@ def get_hypotheses(self, candidate_data, seed_data=None):
"""
# Fit on known data
self.candidate_data = candidate_data.drop(
columns=["target"], axis=1, errors="ignore"
)
self.candidate_data = candidate_data

if seed_data is not None:
self.seed_data = seed_data
else:
raise ValueError(
"Linear Agent requires a finite seed as input. "
"RegressorAgent requires a finite seed as input. "
"If you are using this as part of a Campaign, consider "
"the create_seed option."
)

X_seed = seed_data.drop(columns=["target"], axis=1)
y_seed = seed_data["target"]
steps = [
("scaler", StandardScaler()),
(
"linear",
LinearRegression(),
),
]
self.pipeline = Pipeline(steps)
self.pipeline.fit(X_seed, y_seed)
output = self.pipeline.predict(self.candidate_data)
if self.features is not None:
X_seed = seed_data[self.features]
X_cand = candidate_data[self.features]
else:
X_seed = seed_data.drop(columns=[self.target], axis=1)
X_cand = candidate_data.drop(
columns=[self.target], axis=1, errors="ignore"
)
y_seed = seed_data[self.target]
self.model.fit(X_seed, y_seed)
output = self.model.predict(X_cand)
sorted_output = np.argsort(output)[::-1]
selected = sorted_output[: self.n_query]
return candidate_data.iloc[selected]

@classmethod
def from_linear(
cls,
features=None,
target="target",
candidate_data=None,
seed_data=None,
n_query: int = None,
**kwargs
):
"""Preset factory method for a Linear Agent"""
linear_reg = LinearRegression(**kwargs)
return cls(
model=linear_reg,
features=features,
target=target,
candidate_data=candidate_data,
seed_data=seed_data,
n_query=n_query,
)

@classmethod
def from_random_forest(
cls,
features=None,
target="target",
candidate_data=None,
seed_data=None,
n_query: int = None,
**kwargs
):
"""Preset factory method for a RandomForestRegressor-based Agent"""
rf = RandomForestRegressor(**kwargs)
return cls(
model=rf,
features=features,
target=target,
candidate_data=candidate_data,
seed_data=seed_data,
n_query=n_query,
)

@classmethod
def from_mlp(
cls,
features=None,
target="target",
candidate_data=None,
seed_data=None,
n_query: int = None,
**kwargs
):
"""Preset factory method for an MLP-based Agent"""
mlp = MLPRegressor(**kwargs)
return cls(
model=mlp,
features=features,
target=target,
candidate_data=candidate_data,
seed_data=seed_data,
n_query=n_query,
)
Loading

0 comments on commit f4995b3

Please sign in to comment.