# Practical Tips for Neural Networks

In [None]:
!pip install matminer

In [None]:
!pip install skorch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetBinaryClassifier
from matminer.datasets.convenience_loaders import load_mp
from matminer.featurizers.conversions import StrToComposition
from matminer.featurizers.composition import ElementProperty

## Load Material Property dataset

In [None]:
df = load_mp()  # loads dataset in a pandas DataFrame object
df.head()
df.describe()

## Featurize dataset (~3 minutes)

In [None]:
# Convert formula to composition
df = StrToComposition().featurize_dataframe(df, "formula", ignore_errors=True) 

# Create features based on composition
ep_feat = ElementProperty.from_preset(preset_name="magpie") 

# input the "composition" column to the featurizer
df = ep_feat.featurize_dataframe(df, col_id="composition", ignore_errors=True)  

## Construct dataset (heat of formation)

In [None]:
excluded = ['gap pbe', 'formula', 'composition', 'mpid', 'e_hull', 'elastic anisotropy',
       'bulk modulus', 'shear modulus', 'mu_b']
df_ = df.drop(excluded, axis=1).sample(frac=1).reset_index(drop=True)
df_ = df_.dropna(axis=0) 

# Convert gap to metallic classification problem
y = df_['e_form'].values.reshape(-1, 1)

# Drop tasks from data frame
excluded = ['e_form']
df_ = df_.drop(excluded, axis=1)
X = df_.values

## Split and Standardize dataset 

In [None]:
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9)

# Standardize input data
X_mean = X_train.mean(axis=0, keepdims=True)
X_std = X_train.std(axis=0, keepdims=True)
y_mean = y_train.mean(keepdims=True)
y_std = y_train.std(keepdims=True)

X_train = (X_train-X_mean)/X_std
X_test = (X_test-X_mean)/X_std
y_train = (y_train-y_mean)/y_std
y_test = (y_test-y_mean)/y_std

# Convert numpy array to pytorch
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train).float()
y_test = torch.tensor(y_test).float()
print(X_train.shape, y_train.shape)

## Construct model

In [None]:
num_features = X_train.shape[-1]
dim = 1024

model = nn.Sequential(nn.Linear(num_features, dim),
                      nn.ReLU(),
                      *[nn.Linear(dim, dim),
                      nn.BatchNorm1d(dim),
                      nn.ReLU()]*2,
                      nn.Linear(dim, 1))

## Optimize model

In [None]:
from skorch import NeuralNetRegressor
net = NeuralNetRegressor(model, batch_size=64, max_epochs=20, lr=2e-4, device='cuda')
net.fit(X_train, y_train)

## Evaluate performance

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
preds = net.predict(X_train)
preds = preds*y_std + y_mean
targets = y_train.numpy()*y_std + y_mean
print(mean_absolute_error(targets, preds))
print(r2_score(targets, preds))

In [None]:
preds = net.predict(X_test)
preds = preds*y_std + y_mean
targets = y_test.numpy()*y_std + y_mean
print(mean_absolute_error(targets, preds))
print(r2_score(targets, preds))

## Optimizers: Learning Rate Scheduler and Callbacks

In [None]:
model = nn.Sequential(nn.Linear(num_features, dim),
                      nn.ReLU(),
                      *[nn.Linear(dim, dim),
                      nn.ReLU(),
                      nn.Dropout(p=0.2)]*5,
                      nn.Linear(dim, 1))

from skorch.callbacks import LRScheduler
callbacks=[
        ('lr_scheduler',
         LRScheduler(policy='ReduceLROnPlateau',
                     min_lr=1e-5)),
    ]
net = NeuralNetRegressor(model, batch_size=64, max_epochs=20, lr=2e-3, device='cuda', callbacks=callbacks)
net.fit(X_train, y_train)

In [None]:
preds = net.predict(X_train)
preds = preds*y_std + y_mean
targets = y_train.numpy()*y_std + y_mean
print(mean_absolute_error(targets, preds))
print(r2_score(targets, preds))
preds = net.predict(X_test)
preds = preds*y_std + y_mean
targets = y_test.numpy()*y_std + y_mean
print(mean_absolute_error(targets, preds))
print(r2_score(targets, preds))