# Test Neural Network on Different Compositions of CrFeCoNi
<b> nanoHUB tools by: </b>  <i>Mackinzie S. Farnell, Zachary D. McClure</i> and <i>Alejandro Strachan</i>, Materials Engineering, Purdue University <br>

We test the neural network trained on equiatomic CrFeCoNi on different compositions of CoCrFeNi for relaxed vfe, cohesive energy, pressure, and volume. This involves many of the same steps as training the neural network, including loading properties, adding descriptors, and normalizing the properties. We evaluate the model performance by making plots of the predicted versus actual data points and calculating the mean absolute error (MAE) and mean squared error (MSE).

Overview
1. Load Bispectrum Coefficients and Output Properties
2. Add Pymatgen Descriptors
3. Normalize Properties
4. Predict Properties and Evaluate Model

In [None]:
# import libraries we will need
import tensorflow as tf
import keras as ke
from keras.models import load_model

import json as js
import numpy as np
import pymatgen as pymat
import csv
import re

import plotly.offline as p
import plotly.graph_objs as go

import os.path
from os import path

## 1. Load Bispectrum Coefficients and Output Properties
The unrelaxed bispectrum coefficients and relaxed vacancy formation energy, cohesive energy, and local atomic pressures and volumes are stored in a JSON file. The code below extracts this information from the json file and stores each property in a numpy array. These arrays are stored in a dictionary for easy access.
We are testing a model trained on equiatomic CrFeCoNi, so we need to make sure we have use the same inputs as the model we trained on (bispectrum coefficients and additional Pymatgen descriptors). If we provide different inputs, the model will not be able to make accurate predictions.

In [None]:
# list of properties
properties = ["Relaxed_VFE", "Cohesive_Energy", "Pressure", "Volume"]
# list of compositions
compositions = ['20_40_20_20', '15_55_15_15']

# lists all possible compositions - uncomment both lines if you want to see results for all 8 compositions
#compositions = ['40_20_20_20', '20_40_20_20', '20_20_40_20', '20_20_20_40', \
#                '55_15_15_15', '15_55_15_15', '15_15_55_15', '15_15_15_55']

input_prop_key = 'Unrelaxed_Bispectrum_Coefficients'

# initialize dictionary to store properties and other info
properties_dictionaries = {}

# loop through all the properties and compositions
for output_prop_key in properties:
    for comp in compositions:
        filename = '../data/{}.json'.format(comp)
        
        # open file and load data into data variable
        with open(filename, 'r') as f:
            data = js.load(f)

        # get relevant information from data variable
        elements = data['element']
        output_properties = data[output_prop_key]
        input_properties = data[input_prop_key]

        # arrays to store input and output properties and track specific elements
        elements_array = np.array([]) 
        output_properties_array = np.array([])
        input_properties_array = np.array([])

        # create counters to track number of each element
        num_Cr = 0
        num_Fe = 0
        num_Co = 0
        num_Ni = 0
        num_Cu = 0
    
        # store input and output properties in arrays and fill the elements array with element ids
        for i, val in enumerate(elements):
            output_properties_array = np.append(output_properties_array, output_properties[i])
            input_properties_array = np.append(input_properties_array, np.asarray(input_properties[i])) 
            if (val == 'Cr'):
                elements_array = np.append(elements_array, 24)
                num_Cr = num_Cr + 1
            elif (val == 'Fe'):
                elements_array = np.append(elements_array, 26)
                num_Fe = num_Fe + 1
            elif (val == 'Co'):
                elements_array = np.append(elements_array, 27)
                num_Co = num_Co + 1
            elif (val == 'Ni'):
                elements_array = np.append(elements_array, 28)
                num_Ni = num_Ni + 1
            elif (val == 'Cu'):
                elements_array = np.append(elements_array, 29)
                num_Cu = num_Cu + 1
 
        # reshape input_properties_element 
        num_rows = int (input_properties_array.shape[0]/55)
        input_properties_array = np.reshape(input_properties_array, (num_rows, 55))
    
        elements_array = elements_array[np.newaxis].T
        input_properties_array = np.append(input_properties_array, elements_array, 1)
        input_properties_array = input_properties_array.astype(np.float)
        num_elements = np.array([num_Co, num_Cr, num_Fe, num_Ni])
    
        # set values that we will store in dictionary
        # we will need to reference specific values for each property
        if (output_prop_key == 'Relaxed_VFE'):
            min_val = 0.9
            max_val = 2.3
            display = 'Relaxed VFE (eV/atom)'
            units = '(eV/atom)'
            stats_file = 'my_models/63/relaxed_vfe_stats_CrFeCoNi.json'
            model_file = 'my_models/63/relaxed_vfe_model_CrFeCoNi.h5'
            prop = 'Relaxed_VFE'
        elif (output_prop_key == 'Cohesive_Energy'):
            min_val = -5
            max_val = -4
            display = 'Cohesive Energy (eV/atom)'
            units = '(eV/atom)'
            stats_file = 'my_models/63/cohesive_energy_stats_CrFeCoNi.json'
            model_file = 'my_models/63/cohesive_energy_model_CrFeCoNi.h5'
            prop = 'Cohesive_Energy'
        elif (output_prop_key == 'Pressure'):
            min_val = -7
            max_val = 7
            display = 'Pressure (GPa)'
            units = '(GPa)'
            stats_file = 'my_models/63/pressure_bs_centr_stats_CrFeCoNi.json'
            model_file = 'my_models/63/pressure_bs_centr_model_CrFeCoNi.h5'
            prop = 'Pressure'
        elif (output_prop_key == 'Volume'):
            min_val = 10.9
            max_val = 11.3
            display = 'Volume (\u212B\u00b3)'
            units = '(\u212B\u00b3)'
            stats_file = 'my_models/63/volume_bs_centr_stats_CrFeCoNi.json'
            model_file = 'my_models/63/volume_bs_centr_model_CrFeCoNi.h5'
            prop = 'Volume'
    
        # add information to properties dictionary
        properties_dictionaries['{}-{}'.format(output_prop_key, comp)] = {
            'inputs': input_properties_array,
            'outputs': output_properties_array,
            'length': output_properties_array.shape[0],
            'elements': elements_array,
            'num_element': num_elements,
            'min': min_val,
            'max': max_val,
            'display': display,
            'units': units,
            'stats_file': stats_file,
            'model_file': model_file,
            'composition': comp,
            'prop': prop
        }


## 2. Add Pymatgen Descriptors
Properties are queried from Pymatgen to use as model inputs along with the bispectrum coefficients. The properties are queried for just the central atom (i.e. the atom we are predicting on). We add central atom descriptors because the output property values vary based on which atom we are predicting on and the central atom descriptors give the neural network a way to distinguish between different atoms. The queried properties are: 
    - atomic_radius_calculated
    - atomic_radius 
    - atomic_mass
    - poissons_ratio
    - electrical_resistivity
    - thermal_conductivity
    - brinell_hardness

In [None]:
# declare function to query properties from pymatgen
# we declare a function because we will need to do these operations multiple times
def get_property(element, property):
    element_object = pymat.Element(element)
    element_prop = getattr(element_object, property)
    return element_prop

# declare a function to get composition of 12 nearest neighbors for an atom
def get_composition(c):
    num_Cr = 0
    num_Fe = 0
    num_Co = 0
    num_Ni = 0
    num_Cu = 0
    for i in c[3:15]:
        if (i == '2.0'):
            num_Cr = num_Cr + 1
        elif (i == '4.0'):
            num_Fe = num_Fe + 1
        elif (i == '1.0'):
            num_Co = num_Co + 1
        elif (i == '5.0'):
            num_Ni = num_Ni + 1
        elif (i == '3.0'):
            num_Cu = num_Cu + 1
    composition_dictionary = {'Cr': num_Cr, 'Fe': num_Fe, 'Co': num_Co, 'Ni': num_Ni, 'Cu': num_Cu}
    return composition_dictionary

In [None]:
# list properties to query from pymatgen
properties_add = ["atomic_radius_calculated", "atomic_radius", "atomic_mass", 
              "poissons_ratio", "electrical_resistivity", "thermal_conductivity", 
              "brinell_hardness", ]

# iterate through properties and compositions (skipping pressure and volume)
for key in properties_dictionaries:
    # iterate through all properties to query
    for add_property in properties_add:
    
        properties = []
        elements = properties_dictionaries[key]['elements']
        for i in elements:
            if (i == 24):
                ele = 'Cr'
            elif (i == 26):
                ele = 'Fe'
            elif (i == 27):
                ele = 'Co'
            elif (i == 28):
                ele = 'Ni'
            elif (i == 29):
                ele = 'Cu'
            # get property
            prop = get_property(ele, add_property)
            properties.append(float (prop))
        # add property to input array
        properties = np.asarray(properties)
        properties = properties[np.newaxis].T
        properties_dictionaries[key]['inputs'] = np.append(properties_dictionaries[key]['inputs'], properties, 1)


## 3. Normalize Properties
We normalize the inputs and outputs to the model using mean and standard deviation. Each data point (x) is normalized using the mean ($ \mu $) and standard deviation ($ \sigma $) of the set of values. We normalize the data using the mean and standard deviation of the training data for equiatomic CrFeCoNi so that the model predicts on points from the distribution it was trained on. 

$$ x_{new} = \frac{x - µ}{σ}\ $$

In [None]:
# function we call to normalize properties
def normalize(test_train_properties, key, means, stdevs):
    dims = test_train_properties[key].shape

    for j in range(0, dims[0]):
        test_train_properties[key][j] = (test_train_properties[key][j] - means)/stdevs
  
    test_train_properties[key] = np.nan_to_num(test_train_properties[key])
    
    return test_train_properties

In [None]:
for key in properties_dictionaries:
    stats_file_name = '{}_stats.json'.format(properties_dictionaries[key]['prop'])
    if not path.exists(stats_file_name):
        stats_file_name = properties_dictionaries[key]['stats_file']
    # load the stats dictionary
    f = open(stats_file_name,"r") 
    stats_data = js.load(f)
    f.close()
  
    for i in stats_data:
        stats_data[i] = np.asarray(stats_data[i])

    properties_dictionaries[key]['stats'] = stats_data
  
    # normalize the input properties
    properties_dictionaries[key] = normalize(properties_dictionaries[key], "inputs", 
                                            stats_data["means_ins"], stats_data["stdevs_ins"])
  

## 4. Predict Properties and Evaluate Model
We use the model trained from running the [Test Neural Network on Different Compositions of CrFeCoNi:](test_other_comps_CrFeCoNi.ipynb) notebook. If you have not run this notebook, we use a default model trained on equiatomic CoCrFeNi to predict properties for different compositions of CoCrFeNi. 

In [None]:
for key in properties_dictionaries:
    model_name = '{}_trained_model.h5'.format(properties_dictionaries[key]['prop'])
    if not path.exists(model_name):
        model_name = properties_dictionaries[key]['model_file']
    model = load_model(model_name)
    properties_dictionaries[key]['predict'] = model.predict(properties_dictionaries[key]["inputs"])

Model is evaluated by plotting the predicted versus actual data and calculating the (mean absolute error) MAE and (mean squared error) MSE of the test predictions (equations are shown below). We divide the MAE and MSE by the range for each property so that we can compare the error for different output properties.

$$ MSE = \frac{\frac{1}{n}\sum\limits _{i=1} ^{n}(Y_{i}-\hat{Y}_{i})^2}{max-min} $$


$$ MAE = \frac{\frac{1}{n}\sum\limits _{i=1} ^{n}|Y_{i}-\hat{Y}_{i}|}{max-min} $$

In [None]:
for key in properties_dictionaries:
    stats_dict = properties_dictionaries[key]['stats']
    predict_data_points = properties_dictionaries[key]['predict']
 
    # variables store actual and predicted data points
    actual_data_points = properties_dictionaries[key]["outputs"]
    # unnormalize the predicted data points
    predicted_data_points = predict_data_points * stats_dict["stdevs_outs"] + stats_dict["means_outs"]

    md_data_Cr = []
    nn_data_Cr = []
    md_data_Fe = []
    nn_data_Fe = []
    md_data_Co = []
    nn_data_Co = []
    md_data_Ni = []
    nn_data_Ni = []
    
    # separate data by element
    for i, val in enumerate(properties_dictionaries[key]['elements']):
        if (val == 24):
            md_data_Cr.append(actual_data_points[i])
            nn_data_Cr.append(predicted_data_points[i])
        elif (val == 26):
            md_data_Fe.append(actual_data_points[i])
            nn_data_Fe.append(predicted_data_points[i])
        elif (val == 27):
            md_data_Co.append(actual_data_points[i])
            nn_data_Co.append(predicted_data_points[i])
        elif (val == 28):
            md_data_Ni.append(actual_data_points[i])
            nn_data_Ni.append(predicted_data_points[i])
    
    # convert lists of data into numpy arrays
    md_data_Cr = np.array(md_data_Cr)
    nn_data_Cr = np.array(nn_data_Cr)
    nn_data_Cr = nn_data_Cr.flatten()

    md_data_Fe = np.array(md_data_Fe)
    nn_data_Fe = np.array(nn_data_Fe)
    nn_data_Fe = nn_data_Fe.flatten()

    md_data_Co = np.array(md_data_Co)
    nn_data_Co = np.array(nn_data_Co)
    nn_data_Co = nn_data_Co.flatten()
    
    md_data_Ni = np.array(md_data_Ni)
    nn_data_Ni = np.array(nn_data_Ni)
    nn_data_Ni = nn_data_Ni.flatten()
    
    dot_size = 6

    # make scatter plot
    fig = go.Figure()
    x_lin = [-24, 24]
    fig.add_trace(go.Scattergl(x=x_lin, 
                         y=x_lin,
                         mode='lines',
                         line=dict(color="black"),
                         showlegend=False)
             )
    # add Cr to plot
    fig.add_trace(go.Scattergl(x=md_data_Cr,
                         y=nn_data_Cr,
                         legendgroup="2",
                         name="Cr",
                         mode='markers',
                         marker=dict(
                            color="red",
                            size=dot_size
                            )
                        )
             )
    # add Fe to plot
    fig.add_trace(go.Scattergl(x=md_data_Fe,
                         y=nn_data_Fe,
                         legendgroup="3",
                         name="Fe",
                         mode='markers',
                         marker=dict(
                            color="orange",
                            size=dot_size
                            )
                        )
             )
    # add Co to plot
    fig.add_trace(go.Scattergl(x=md_data_Co,
                         y=nn_data_Co,
                         legendgroup="1",
                         name="Co",
                         mode='markers',
                         marker=dict(
                            color="blue",
                            size = dot_size
                            )
                        )
             )
    # add Ni to plot
    fig.add_trace(go.Scattergl(x=md_data_Ni,
                         y=nn_data_Ni,
                         legendgroup="4",
                         name="Ni",
                         mode='markers',
                         marker=dict(
                            color="green",
                            size=dot_size
                            )
                        )
             )

    fig.update_xaxes(range=[properties_dictionaries[key]['min'], properties_dictionaries[key]['max']])
    fig.update_yaxes(range=[properties_dictionaries[key]['min'], properties_dictionaries[key]['max']])

    # add axis labels to plot
    fig.update_layout(
        showlegend=True,
        title=key,
        xaxis_title="Molecular Mechanics {}".format(properties_dictionaries[key]['units']),
        yaxis_title="Neural Network {}".format(properties_dictionaries[key]['units']),
        font=dict(
            family="Times New Roman, monospace",
            size=24,
            color="black"
        )
    )
    fig.show()

    # calculate MSE and MAE
    test_mse = np.mean((predicted_data_points-actual_data_points)**2)
    test_mae = np.mean(np.abs(predicted_data_points-actual_data_points))
    test_error = (predicted_data_points-actual_data_points)

    # find a normalized mse and mae
    max = np.amax(actual_data_points)
    min = np.amin(actual_data_points)
    test_range = np.abs(max - min)
    mse_norm = test_mse/test_range
    mae_norm = test_mae/test_range

    # try to get percent error
    error = np.abs(test_error)
    percent_error = np.mean(error/np.abs(actual_data_points)) * 100
    
    # display MSE and MAE
    print(f'Test_MAE/range: {mae_norm:.5f}')
    print(f'Test_MSE/range: {mse_norm:.5f}')

In [None]:
print("done! :)")