### Importing packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from pymatgen.core import Structure, Composition
from matminer.featurizers.structure import DensityFeatures, StructuralComplexity, MaximumPackingEfficiency
from matminer.featurizers.composition import ElementProperty
from matminer.featurizers.conversions import CompositionToOxidComposition
from matminer.featurizers.composition.ion import ElectronegativityDiff
from matminer.featurizers.structure.matrix import OrbitalFieldMatrix

  from .autonotebook import tqdm as notebook_tqdm


### Training models from csv files and testing them

In [2]:
stablility_data = pd.read_csv("stability_data.csv")

In [3]:
y = stablility_data["stable"].values
X = stablility_data.drop("stable", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
stability_classifier = RandomForestClassifier(random_state=0)
stability_classifier.fit(X_train, y_train)

In [5]:
training_accuracy = stability_classifier.score(X_train, y_train) * 100
print(f'Accuracy on training data: {training_accuracy:.1f} %')
testing_accuracy = stability_classifier.score(X_test, y_test) * 100
print(f'Accuracy on testing data: {testing_accuracy:.1f} %')

Accuracy on training data: 100.0 %
Accuracy on testing data: 83.1 %


In [6]:
bandgap_data = pd.read_csv("bandgap_data.csv")

In [7]:
y = bandgap_data["band gaps"].values
X = bandgap_data.drop("band gaps", axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
bandgap_regressor = RandomForestRegressor(random_state=0)
bandgap_regressor.fit(X_train, y_train)

In [10]:
y_predict = bandgap_regressor.predict(X_test)
print(f"MSE: {mean_squared_error(y_test, y_predict)}")
print(f"r2: {r2_score(y_test, y_predict)}")

MSE: 0.5476220879924832
r2: 0.7577680685419896


### Making a function for converting inputs into pymatgen objects

In [11]:
def pymatgen_converter(element_1, element_2, element_3, abc, angles, volume, matrix, pbc, frac_coords_1, frac_coords_2, frac_coords_3, frac_coords_4, frac_coords_5):
    """
    Taking inputs and making a pymatgen composition object and structure object.
    Args:
        element_1      (string)
        element_2      (string)
        element_3      (string)
        abc            (array of floats)
        angles         (array of floats)
        volume         (float)
        matrix         (array of arrays of floats)
        pbc            (array of Booleans - always True I think)
        frac_coords_1  (list of floats)
        frac_coords_2  (list of floats)
        frac_coords_3  (list of floats)
        frac_coords_4  (list of floats)
        frac_coords_5  (list of floats)
    Returns:
        composition (pymatgen composition)
        structure (pymatgen structure)
    """
    composition_dict = {element_1: 1, element_2: 1, element_3: 3}
    composition = Composition(composition_dict)
    
    structure_dict = {
        "lattice": {
            "abc": abc,
            "angles": angles,
            "volume": volume,
            "matrix": matrix,
            "pbc": pbc
        },
        "sites": [
            {"species": [{"element": element_1, "occu": 1}], "abc": frac_coords_1},
            {"species": [{"element": element_2, "occu": 1}], "abc": frac_coords_2},
            {"species": [{"element": element_3, "occu": 1}], "abc": frac_coords_3},
            {"species": [{"element": element_3, "occu": 1}], "abc": frac_coords_4},
            {"species": [{"element": element_3, "occu": 1}], "abc": frac_coords_5}
        ]
    }
    structure = Structure.from_dict(structure_dict)

    return composition, structure

### Using function to make a pymatgen object, from which features are generated and used as inputs for the appropriate models

In [12]:
material = pymatgen_converter("Ac", "Al", "O", 
                   [3.85863387, 3.85863387, 3.85863387], 
                   [90.0, 90.0, 90.0], 
                   57.4514132376898, 
                   [[3.85863387, -0.0, 0.0], [-0.0, 3.85863387, 0.0], [-0.0, -0.0, 3.85863387]], 
                   [True, True, True], 
                   (0.0, 0.0, 0.0), (0.5, 0.5, 0.5), (0.5, 0.5, 0.0), (0.5, 0.0, 0.5), (0.0, 0.5, 0.5))

material_df = pd.DataFrame({"composition": [material[0]],
                            "structure": [material[1]]})

#### Stability model

In [13]:
stability_feats = material_df
stability_feats = DensityFeatures().featurize_dataframe(stability_feats, "structure")
stability_feats = StructuralComplexity().featurize_dataframe(stability_feats, "structure")
stability_feats = MaximumPackingEfficiency().featurize_dataframe(stability_feats, "structure")

DensityFeatures: 100%|██████████| 1/1 [00:00<00:00,  2.98it/s]
StructuralComplexity: 100%|██████████| 1/1 [00:00<00:00,  3.88it/s]
MaximumPackingEfficiency: 100%|██████████| 1/1 [00:00<00:00,  3.83it/s]


In [15]:
X = stability_feats.drop(["composition", "structure"], axis=1)
print(f"Stable: {stability_classifier.predict(X)[0]}")

Stable: True


#### Band gap model

In [16]:
bandgap_feats = material_df
ep_feat = ElementProperty.from_preset(preset_name="magpie")
bandgap_feats = ep_feat.featurize_dataframe(bandgap_feats, col_id="composition")
bandgap_feats = CompositionToOxidComposition().featurize_dataframe(bandgap_feats, "composition")
bandgap_feats = ElectronegativityDiff().featurize_dataframe(bandgap_feats, "composition_oxid", ignore_errors=True)
bandgap_feats = OrbitalFieldMatrix().featurize_dataframe(bandgap_feats, "structure")

ElementProperty: 100%|██████████| 1/1 [00:00<00:00,  4.42it/s]
CompositionToOxidComposition: 100%|██████████| 1/1 [00:00<00:00,  5.93it/s]
ElectronegativityDiff: 100%|██████████| 1/1 [00:00<00:00,  5.45it/s]
OrbitalFieldMatrix: 100%|██████████| 1/1 [00:00<00:00,  3.41it/s]


In [17]:
X = bandgap_feats.drop(["composition", "structure", "composition_oxid"], axis=1)
print(f"Band Gap: {bandgap_regressor.predict(X)[0]}")

Band Gap: 3.5935729999999957
