This notebook trains a message passing neural network using a 10 fold cross validation split.

In [None]:
from polyid.preprocessors import PolymerPreprocessor
from polyid import MultiModel, Parameters
from polyid.models import global100

from nfp.preprocessing.features import atom_features_v1, bond_features_v1
from model_utils import bond_featurizer

import pandas as pd
import numpy as np

In [None]:
# Generate model parameters
# Paremeters has default values that can be changed
# For optimized hyperparameters see forthcoming publication.
params = Parameters()
params.prediction_columns = ['Glass_Transition',
                             'Melt_Temp',
                             'Density',
                             'log10_Permeability_CO2', 
                             'log10_Permeability_N2', 
                             'log10_Permeability_O2', 
                             'YoungMod']
params.epochs = 500 # recommended 500 - 1000
params.kfolds = 10 # recommended 10
print(pd.DataFrame(pd.Series(params.to_dict()),columns=['parameter']),'\n')

# Create the MultiModel class that manages multiple SingleModels
mm = MultiModel()

# Load data in and specify prediction columns
mm.load_dataset('../data/dftrain.csv', prediction_columns=params.prediction_columns)

# Split the data up into kfolds and generate the model classes
mm.split_data(kfolds=params.kfolds)

# Scale the data. This scales using the entire data set and then scales each individual model with that scaler
mm.generate_data_scaler()

# Generate the preprocessors for each model
# Here we use a preprocessor that uses just smiles
mm.generate_preprocessors(preprocessor=PolymerPreprocessor, atom_features=atom_features_v1, bond_features=bond_features_v1)

# Train the models
mm.train_models(modelbuilder=global100, model_params=params.to_dict(), save_folder="save_examples", save_training=True)