This notebook trains a message passing neural network using a 10 fold cross validation split.

In [1]:
from polyid.preprocessors import PolymerPreprocessor
from polyid import MultiModel, Parameters
from polyid.models import global100

from nfp.preprocessing.features import atom_features_v1, bond_features_v1

import pandas as pd
import numpy as np

2023-09-23 14:09:33.013211: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-23 14:09:33.378772: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda3/envs/stonks/lib/
2023-09-23 14:09:33.378840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda

In [None]:
# Generate model parameters
params = Parameters()

# Optimized hyperparameters from publication.
params.batch_size = 1
params.learning_rate = 1E-4
params.decay = 1E-5
params.atom_features = 128
params.bond_features = 128
params.num_messages = 12 

# Parameters for training example
params.prediction_columns = ['Glass_Transition',
                            'Melt_Temp',
                            'Density',
                            'log10_Permeability_CO2', 
                            'log10_Permeability_N2', 
                            'log10_Permeability_O2', 
                            'YoungMod']
params.epochs = 500 # recommended 500 - 1000
params.kfolds = 10

print(pd.DataFrame(pd.Series(params.to_dict()),columns=['parameter']),'\n')

# Create the MultiModel class that manages multiple SingleModels
mm = MultiModel()

# Load data in and specify prediction columns
mm.load_dataset('../data/dftrain.csv', prediction_columns=params.prediction_columns)

# Split the data up into kfolds and generate the model classes
mm.split_data(kfolds=params.kfolds)

# Scale the data. This scales using the entire data set and then scales each individual model with that scaler
mm.generate_data_scaler()

# Generate the preprocessors for each model
# Here we use a preprocessor that uses just smiles
mm.generate_preprocessors(preprocessor=PolymerPreprocessor, atom_features=atom_features_v1, bond_features=bond_features_v1,batch_size=params.batch_size)

# Train the models
mm.train_models(modelbuilder=global100, model_params=params.to_dict(), save_folder="save_examples", save_training=True)