# Evaluating a parsimonious neural network (PNN) model - melting #

<i>Saaketh Desai</i>, and <i>Alejandro Strachan</i>, School of Materials Engineering, Purdue University <br>

This notebook evaluates a parsimonious neural network generated by the optimization algorithm in the [previous notebook](discover_melting.ipynb). The outline of this notebook is:

1. Read training and testing data
2. Create model and set weights to values obtained from optimization 
3. Express network weights as interpretable equations
4. Evaluate model metrics on datasets and the objective function as per the genetic algorithm

In [None]:
import sys
import os

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


import tensorflow as tf
import keras
from keras import backend as K
from keras import initializers
from keras.layers import Dense, Input, Activation, multiply
from keras.models import Sequential, Model, load_model
from keras.layers.merge import add, concatenate

from matplotlib import pyplot as plt

## Step 1: Read training and testing data##
We read in a CSV file containing the fundamental quantities such as bulk modulus, shear modulus, density etc., along with the experimental melting temperature. We then compute quantities such as effective sound speed ($v_m$) to compute effective temperatures $\theta_0, \theta_1, \theta_2, \theta_3$ and normalized inputs $\theta_1', \theta_2', \theta_3'$. Finally, we use the `train_test split()` method from scikit-learn to split the data into training and testing sets

In [None]:
df = pd.read_csv("../data/Combined_data_v3.csv")
print (df.shape)

In [None]:
h = 6.62607015*1e-34
k = 1.380649*1e-23
Na = 6.0221407*1e23
pi = np.pi
hbar = 1.054571817*1e-34

vs = np.sqrt(df['G_VRH']/df['density']) #from Zack
vp = np.sqrt((df['K_VRH'] + (4/3)*df['G_VRH'])/df['density']) #from Zack
vm = ( 3/( (1/vp)**3 + 2*(1/vs)**3 ) )**(1/3) #from JP Poirier paper

df['debye_temp'] = 10**13*(h/k)*(3/(4*pi*df['volume_per_atom']))**(1/3)*vm

df['a'] = (df['volume_per_atom'])**(1/3)

a = df['a']
m = df['mean_mass']
G = df['G_VRH']
K = df['K_VRH']

In [None]:
theta0 = (1.054571817/1.380649)*100*vm/a #hcross*vm/(k*a)
theta1 = (1.054571817**2*6.0221407/1.380649)*10*(1/(m*a**2)) #hcross**2/(m*a**2*k)
theta2 = (1/1.380649)*100*(a**3*G) #a**3*G/k
theta3 = (1/1.380649)*100*(a**3*K) #a**3*K/k

theta1_prime = theta1/theta0
theta2_prime = theta2/theta0
theta3_prime = theta3/theta0

ones = np.ones(len(theta1_prime))

Tm_prime = df['Tm']/theta0

inputs = np.array([theta1_prime, theta2_prime, theta3_prime, ones], dtype='float')
inputs = inputs.T
outputs = np.array(Tm_prime).reshape(-1, 1)

print (inputs.shape, outputs.shape)

train_inputs, test_inputs, train_outputs, test_outputs = train_test_split(inputs, outputs, test_size=0.2, random_state=0)
print (train_inputs.shape, train_outputs.shape)
print (test_inputs.shape, test_outputs.shape)

## Step 2: Create model and set weights to values obtained from optimization##
We now create the generic neural network (used as a starting point during the equation discovery process) and set the activations and weights to be the values obtained from the optimization

In [None]:
act_dict = {0: 'linear', 1: 'squared', 2: 'inverse', 3: 'multiply', 4: 'tanh'}
np.random.seed(100000)
weight_dict = {0: 0, 1: 1, 2: np.random.uniform(-1,1,1)[0]}
nact_terms = 4
nweight_terms = 13

In [None]:
#Collect act, weights and biases from individual
individual = [0, 2, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 2, 2, 0, 2]
weights_list =  "0.         0.         0.03156352 1.         0.         0. \
0.         0.         0.         0.03156353 0.0120849  0. \
0.91340095"

weights_list = [float(x) for x in weights_list.split()]

In [None]:
def squared_act(x):
    return x*x

def inverse_act(x):
    return 1/x

In [None]:
def create_node(input1, input2, input3, name, trainable1, trainable2, trainable3, act, x, idx):
    base = name
    n1 = base + "1"
    n2 = base + "2"
    n3 = base + "3"
    an1 = Dense(1, activation = 'linear', use_bias = False, name=n1, trainable=trainable1) (input1)
    an2 = Dense(1, activation = 'linear', use_bias = False, name=n2, trainable=trainable2) (input2)
    an3 = Dense(1, activation = 'linear', use_bias = False, name=n3, trainable=trainable3) (input3)
    
    node_list = [an1, an2, an3]
    if (act == "multiply"):
        non_zero_list = []
        zero_list = []
        for i, j in enumerate(node_list):
            if (x[idx+i] == 1 or x[idx+i] == 2):
                non_zero_list.append(j)
            else:
                zero_list.append(j)
        if ( len(non_zero_list) == 0 ):
            non_zero_list = node_list
            an = multiply(non_zero_list)
        if ( len(non_zero_list) == 1 ):
            anx = non_zero_list[0]
            an = add([anx, zero_list[0], zero_list[1]])
        else:
            an = multiply(non_zero_list)
    else:
        an = add(node_list)
        if (act == "squared"):
            an = Activation(squared_act) (an)
        elif (act == "inverse"):
            an = Activation(inverse_act) (an)
        else:
            an = Activation(act) (an)
    return an

In [None]:
def create_model(x):
    #initializer = keras.initializers.RandomUniform(minval=-0.001, maxval=0.001, seed=0)
    bias_initial = keras.initializers.Zeros()

    trainable_list = []
    for i in range(nweight_terms):
        if (x[i+nact_terms] == 2):
            trainable_list.append(True)
        else:
            trainable_list.append(False)

    input1 = Input(shape=(1,))
    input2 = Input(shape=(1,))
    input3 = Input(shape=(1,))
    input4 = Input(shape=(1,))

    a1 = create_node(input1, input2, input3, "a1", trainable_list[0], trainable_list[1], 
                     trainable_list[2], act_dict[x[0]], x, 0+nact_terms)
    a2 = create_node(input1, input2, input3, "a2", trainable_list[3], trainable_list[4], 
                     trainable_list[5], act_dict[x[1]], x, 3+nact_terms)
    a3 = create_node(input1, input2, input3, "a3", trainable_list[6], trainable_list[7], 
                     trainable_list[8], act_dict[x[2]], x, 6+nact_terms)

    an1 = Dense(1, activation = 'linear', use_bias = False, name='output1', trainable=trainable_list[9]) (a1)
    an2 = Dense(1, activation = 'linear', use_bias = False, name='output2', trainable=trainable_list[10]) (a2)
    an3 = Dense(1, activation = 'linear', use_bias = False, name='output3', trainable=trainable_list[11]) (a3)

    an4 = Dense(1, activation = 'linear', use_bias = False, name='output4', trainable=trainable_list[12]) (input4)

    act = act_dict[x[3]]
    node_list = [an1, an2, an3, an4]
    if (act == "multiply"):
        non_zero_list = []
        zero_list = []
        for i, j in enumerate(node_list):
            if (x[9+i] == 1 or x[9+i] == 2):
                non_zero_list.append(j)
            else:
                zero_list.append(j)
        if ( len(non_zero_list) == 0 ):
            non_zero_list = node_list
            an = multiply(non_zero_list)
        elif ( len(non_zero_list) == 1 ):
            anx = non_zero_list[0]
            an = add([anx, zero_list[0], zero_list[1], zero_list[2]])
        else:
            an = multiply(non_zero_list)
    else:
        an = add(node_list)
        if (act == "squared"):
            an = Activation(squared_act) (an)
        elif (act == "inverse"):
            an = Activation(inverse_act) (an)
        else:
            an = Activation(act) (an)
    output = an

    model = Model(inputs=[input1, input2, input3, input4], outputs=[output])
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
    model.compile(loss='mse', optimizer=optimizer)
    
    layer_list = []
    for i in range(len(model.layers)):
        name = model.layers[i].name
        if ( ("activation" in name) or ("input" in name) or ("add" in name) or ("multiply" in name) ):
            continue
        else:
            layer_list.append(i)
    
    for i in range(len(layer_list)):
        #model.layers[layer_list[i]].set_weights( [ np.array( [[ weight_dict[x[nact_terms+i]] ]] ) ] )
        model.layers[layer_list[i]].set_weights( [ np.array( [[ weights_list[i] ]] ) ] )

    #model.summary()

    return model, trainable_list

In [None]:
def f3(w):
    return w

In [None]:
new_model, trainable = create_model(individual)
#new_model.summary()

In [None]:
weights = new_model.get_weights()
weight_list = []
for weight in weights:
    weight_list.append(weight[0][0])
weight_list = np.array(weight_list)

## Step 3: Express network weights as interpretable equations##
We use the sympy library to decode the weights and activations of the network into equations

In [None]:
#Get list of layer names in model
name_list = []
for i in range(len(new_model.layers)):
    name = new_model.layers[i].name
    if ( ("activation" in name) or ("input" in name) or ("add" in name) or ("multiply" in name) ):
        continue
    else:
        name_list.append(name)
print (name_list)

In [None]:
#Set activation for each neuron
act_nodes = {"a1": act_dict[individual[0]], "a2": act_dict[individual[1]], "a3": act_dict[individual[2]], 
             "output": act_dict[individual[3]]}

weight_dict = {}
for i, weight in enumerate(weight_list):
    name = name_list[i]
    weight_dict[name] = weight

In [None]:
#Define symbolic variables

from sympy import *

theta1_prime, theta2_prime, theta3_prime = symbols('theta1_prime theta2_prime theta3_prime')
#Symbolically evaluate each node
def return_value(i1, i2, i3, act, name):
    n1 = name + "1"
    n2 = name + "2"
    n3 = name + "3"
    if (act == 'linear'):
        value = i1*weight_dict[n1] + i2*weight_dict[n2] + i3*weight_dict[n3]
    elif (act == 'squared'):
        value = ( i1*weight_dict[n1] + i2*weight_dict[n2] + i3*weight_dict[n3] )**2
    elif (act == 'multiply'):
        value = i1*weight_dict[n1] * i2*weight_dict[n2] * i3*weight_dict[n3]
    elif (act == 'inverse'):
        value = 1/( i1*weight_dict[n1] + i2*weight_dict[n2] + i3*weight_dict[n3] )
    elif (act == 'tanh'):
        value = tanh( i1*weight_dict[n1] + i2*weight_dict[n2] + i3*weight_dict[n3] ) 
    return value

In [None]:
# Collect expressions for node a1, a2, a3

a1 = return_value(theta1_prime, theta2_prime, theta3_prime, act_nodes["a1"], "a1")
a2 = return_value(theta1_prime, theta2_prime, theta3_prime, act_nodes["a2"], "a2")
a3 = return_value(theta1_prime, theta2_prime, theta3_prime, act_nodes["a3"], "a3")

#Symbolically evaluate output
name = "output"

n1 = name + "1"; n2 = name + "2"; n3 = name + "3"; n4 = name + "4"
act = act_nodes["output"]
if (act == 'linear'):
    value = a1*weight_dict[n1] + a2*weight_dict[n2] + a3*weight_dict[n3] + 1*weight_dict[n4]
elif (act == 'squared'):
    value = ( a1*weight_dict[n1] + a2*weight_dict[n2] + a3*weight_dict[n3] + 1*weight_dict[n4] )**2
elif (act == 'multiply'):
    value = a1*weight_dict[n1] * a2*weight_dict[n2] * a3*weight_dict[n3] * 1*weight_dict[n4]
elif (act == 'inverse'):
    value = 1/( a1*weight_dict[n1] + a2*weight_dict[n2] + a3*weight_dict[n3] + 1*weight_dict[n4] )
elif (act == 'tanh'):
    value = tanh( a1*weight_dict[n1] + a2*weight_dict[n2] + a3*weight_dict[n3] + 1*weight_dict[n4] ) 

output = value

print (output)

## Step 4: Evaluate model on dataset and evaluate objective function ##
We now evaluate the model discovered on the testing dataset using the metrics defined during the training process. Lastly, we evaluate the objective function of our model, as seen by the genetic algorithm

In [1]:
valid_flag = True
#Evaluate model on train/test sets
mse_train = new_model.evaluate([train_inputs[:, 0], train_inputs[:, 1], train_inputs[:, 2], train_inputs[:, 3]], 
                               train_outputs, verbose=0)
mse_test = new_model.evaluate([test_inputs[:, 0], test_inputs[:, 1], test_inputs[:, 2], test_inputs[:, 3]], 
                              test_outputs, verbose=0)

if (np.isnan(mse_train) or np.isnan(mse_test) or np.isinf(mse_train) or np.isinf(mse_test)):
    valid_flag = False

weights = new_model.get_weights()
weight_list = []
for weight in weights:
    weight_list.append(weight[0][0])
weight_list = np.array(weight_list)

#handle nan weights
if (np.isnan(weight_list).any()):
    valid_flag = False

if (valid_flag):
    pass
    #print (weight_list)
else:
    mse_test = 1e50

actfunc_term = [i**2 for i in individual[:nact_terms]]
weights = individual[nact_terms:]
weight_term = 0
for j in range(nweight_terms):
    weight_term += f3(weights[j])

mse_test_term = np.log10(mse_test)

obj = mse_test_term + 0.002*(np.sum(actfunc_term) + weight_term)
#print ("Individual: ", individual, flush=True)
print ("Objective function: ", mse_test, np.sum(actfunc_term), weight_term, obj, flush=True)

# Parsimonious neural networks learn non-linear interpretable laws

NameError: name 'new_model' is not defined

In [None]:
print (np.sqrt(mse_train), np.sqrt(mse_test))

We can plot our predictions vs the ground truth data in a parity plot

In [None]:
test_preds = new_model.predict([test_inputs[:, 0], test_inputs[:, 1], test_inputs[:, 2], test_inputs[:, 3]])
train_preds = new_model.predict([train_inputs[:, 0], train_inputs[:, 1], train_inputs[:, 2], train_inputs[:, 3]])

plt.plot(train_preds, train_outputs, 'o')
plt.plot(test_preds, test_outputs, 'o')
x = np.linspace(min(train_preds), max(train_preds), 500)
plt.plot(x, x)

plt.grid()