In [1]:
#Library Used in all Code
import numpy as np
import pandas as pd

#Used In Rule of Mixtures
import pymatgen as pymat
import mendeleev as mendel

#Random
np.random.seed(44) # Random Seed1
import random

# PLOTTING (MATPLOTLIB)
%matplotlib inline
import matplotlib.animation as animation
from IPython.display import HTML

# PLOTTING (PLOTLY)
import plotly 
import plotly.graph_objs as go
from plotly.offline import iplot
plotly.offline.init_notebook_mode(connected=True)

# ML
from lolopy.learners import RandomForestRegressor
from lolopy.metrics import root_mean_squared_error
from lolopy.metrics import standard_error
from sklearn.metrics import mean_absolute_error


# Meltheas TOOL
from simtool import findInstalledSimToolNotebooks, searchForSimTool
from simtool import getSimToolInputs, getSimToolOutputs, Run

MeltHEA = searchForSimTool('meltheas')
from IPython.display import clear_output
from IPython.display import display
from hublib.ui import FileUpload
import ipywidgets as widgets
import os
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

<IPython.core.display.Javascript object>

# Important Initial Dataset

In [2]:
#Get initial set for testing data
initial_data = pd.read_csv('~/activemeltheas/data/NewSmallCompistionDataSet.csv')
initial_data = initial_data.drop(['Solid Tm'], axis=1)
initial_data = initial_data.drop(['Liquid Tm'], axis=1)
initial_data = initial_data.drop(['Minus'], axis=1)
initial_data = initial_data.drop(['Plus'], axis=1)
initial_data = initial_data.drop(['Unnamed: 0'], axis=1)
compared_df = initial_data.drop(['Simulated Tm'], axis=1)
display(initial_data)

Unnamed: 0,Cr,Co,Cu,Fe,Ni,Simulated Tm
0,0.1,0.1,0.3,0.2,0.3,1950.23001
1,0.1,0.1,0.3,0.3,0.2,2021.213212
2,0.2,0.1,0.1,0.3,0.3,2172.548152
3,0.2,0.1,0.2,0.2,0.3,2048.14567
4,0.2,0.1,0.2,0.3,0.2,2059.882861
5,0.2,0.1,0.3,0.1,0.3,1846.791018
6,0.2,0.1,0.3,0.2,0.2,1955.90749
7,0.3,0.1,0.1,0.2,0.3,2148.061845
8,0.3,0.1,0.1,0.3,0.2,2164.932463
9,0.3,0.1,0.2,0.1,0.3,1976.761266


# All Plausible Composition Creator

In [3]:
#Create lines to get values with .1 step size
nx = 11
x1 = np.linspace(0, 1, nx)
y, y1, y2, y3, y4 = np.meshgrid(x1, x1, x1, x1, x1)
positions = np.vstack([y.ravel(), y1.ravel(), y2.ravel(), y3.ravel(), y4.ravel()]).T

#Make dataframe with limits of all possible compistions combinations
all_permutations = pd.DataFrame(positions)
all_permutations["sum"]= all_permutations.sum(axis = 1)
plausible_compositions = all_permutations[all_permutations["sum"] == 1]

plausible_compositions = plausible_compositions[plausible_compositions[0] <= 0.5] #Cr
plausible_compositions = plausible_compositions[plausible_compositions[1] <= 0.5] #Co
plausible_compositions = plausible_compositions[plausible_compositions[2] <= 0.5] #Cu
plausible_compositions = plausible_compositions[plausible_compositions[3] <= 0.5] #Fe
plausible_compositions = plausible_compositions[plausible_compositions[4] <= 0.5] #Ni

plausible_compositions = plausible_compositions.round(1)

plausible_compositions = plausible_compositions.drop(["sum"], axis = 1)
initial_plausible_compositions = plausible_compositions
display(len(plausible_compositions))

593

# Apply Rule of Mixtures to Compositions

In [4]:
#string to select characteristics
actual_qued_values = ['youngs_modulus', 'atomic_radius', 'electrical_resistivity','CTE','hardness','boiling_point',
                         'atomic_mass','poissons_ratio','density_of_solid','en_gosh','melting_point']

sample = ['Cr', 'Co', 'Cu', 'Fe', 'Ni']

def get_discriptors(input_string,sample):
    #dictionary values to choose from
    string_dict = ['bulk_modulus','youngs_modulus','CTE','thermal_conductivity','boiling_point','critical_temperature',
                  'vdw_radius','average_ionic_radius','atomic_radius','atomic_number','atomic_mass','molar_volume', 
                   'density_of_solid','hardness','poissons_ratio','period','group','en_gosh','electrical_resistivity', 'melting_point']

    #Creation of dictionary
    data_set = {}

    #Make dict of lists
    for length_string in range(len(string_dict)):
        data_set[string_dict[length_string]] = []

    #all plausbile compositions from before
    df2 = plausible_compositions

    #list of all things
    que_values = []

    for item in sample:
        element_object = pymat.Element(item)
        data_set[string_dict[0]].append(element_object.bulk_modulus) #bulk_modulus
        data_set[string_dict[1]].append(element_object.youngs_modulus) #youngs_modulus
        data_set[string_dict[2]].append(element_object.coefficient_of_linear_thermal_expansion) #CTE
        data_set[string_dict[3]].append(element_object.thermal_conductivity) #thermal conductivity
        data_set[string_dict[4]].append(element_object.boiling_point) #boiling_point
        data_set[string_dict[5]].append(element_object.critical_temperature) #critical temperature
        data_set[string_dict[6]].append(element_object.van_der_waals_radius) #vander wall radius
        data_set[string_dict[7]].append(element_object.average_ionic_radius) #average ionic radius
        data_set[string_dict[8]].append(element_object.atomic_radius) #atomic radius
        data_set[string_dict[9]].append(element_object.Z) #atomic number
        data_set[string_dict[10]].append(element_object.atomic_mass) #atomic mass
        data_set[string_dict[11]].append(element_object.molar_volume) #molar volume
        data_set[string_dict[12]].append(element_object.density_of_solid) #density_solid
        data_set[string_dict[13]].append(element_object.brinell_hardness) #brinell_hardness
        data_set[string_dict[14]].append(element_object.poissons_ratio) #poissons_ratio
        data_set[string_dict[15]].append(element_object.row) #period
        data_set[string_dict[16]].append(element_object.group) #group
        data_set[string_dict[17]].append(element_object.X) #en_gosh
        data_set[string_dict[18]].append(element_object.electrical_resistivity) #electrical_resistivity
        data_set[string_dict[19]].append(element_object.melting_point) #melting_point

    for discript in input_string:
        que_values.append(data_set[discript])

    df = pd.DataFrame(data = que_values, columns = sample, index = input_string) 
    display(df)
    return df

discriptors_df = get_discriptors(actual_qued_values,sample)
discriptors_df.to_csv('AdjustedInitialSetData', index = False)

Unnamed: 0,Cr,Co,Cu,Fe,Ni
youngs_modulus,279.0,209.0,130.0,211.0,200.0
atomic_radius,1.4,1.35,1.35,1.4,1.35
electrical_resistivity,1.27e-07,6e-08,1.72e-08,1e-07,7.2e-08
CTE,4.9e-06,1.3e-05,1.65e-05,1.18e-05,1.34e-05
hardness,1120.0,700.0,874.0,490.0,700.0
boiling_point,2944.0,3200.0,3200.0,3134.0,3186.0
atomic_mass,51.9961,58.93319,63.546,55.845,58.6934
poissons_ratio,0.21,0.31,0.34,0.29,0.31
density_of_solid,7140.0,8900.0,8920.0,7874.0,8908.0
en_gosh,1.66,1.88,1.9,1.83,1.91


In [5]:
#Rename df to match other dataframes
plausible_compositions = plausible_compositions.rename(columns = {0:'Cr',1:'Co',2:'Cu',3:'Fe', 4:'Ni'})

#Taking out replicas
df = pd.merge(plausible_compositions, compared_df, how='left', indicator = 'Exsist')
df = df[df['Exsist'] != 'both']
plausible_compositions = df.drop(['Exsist'], axis = 1)

#display(plausible_compositions)

# Applying Rule of Mixture

In [6]:
#Applu Rule of Mixtures
def rule_mixtures(df2,df,sample): 
    #Creator column of first rule mixture discriptor to concat to
    cr = 0
    for comps in sample:
        cr = cr+df2[comps]*df[comps][0]
    cr = cr.to_frame()

    #Concat rest of compistions with rule of mixtures applied
    for x in range(1,len(actual_qued_values)):
        cf = 0
        for comps in sample:
            cf = cf + df2[comps]*df[comps][x]
        cf.to_frame()
        cr = pd.concat([cr,cf], axis=1)

    #Renames columns to discriptors
    cr.columns = actual_qued_values
    return cr

plausible_compositions_rule_mixtures = rule_mixtures(plausible_compositions,discriptors_df,sample)
known_compositions_rule_mixtures = rule_mixtures(compared_df,discriptors_df,sample)
#display(plausible_compositions_rule_mixtures)
#display(known_compositions_rule_mixtures)
#known_compositions_rule_mixtures.to_csv('AdjustedInitialSetData', index = False)

# Data Preperation

In [7]:
#Concatinate values
all_rule_mixtures = pd.concat([known_compositions_rule_mixtures,plausible_compositions_rule_mixtures])
all_compositions = pd.concat([compared_df,plausible_compositions])

#Experimental melting temperatures [K]
T_m_Cr = 2180
T_m_Co = 1768
T_m_Cu = 1358
T_m_Fe = 1811
T_m_Ni = 1728

#display(all_compositions)

In [8]:
#Set testing and training data
all_values = all_rule_mixtures.values.tolist()
all_labels = initial_data['Simulated Tm'].tolist()

# List of lists are turned into Numpy arrays to facilitate calculations in steps to follow (Normalization).
all_values = np.array(all_values, dtype = float)
all_labels = np.array(all_labels, dtype = float)

create_label = np.zeros((plausible_compositions_rule_mixtures.shape[0],), dtype = int)
all_labels = np.concatenate((all_labels,create_label), axis = 0)
#display(all_compositions)

In [9]:
X = all_values.copy() #discriptors
np.random.shuffle(X)
y = all_labels.copy() #all labels  
np.random.shuffle(y)

# assign values
model = RandomForestRegressor(num_trees = 350) #model if iterate through
#print(model.__init__)

entry_number_init = initial_data.shape[0] #starting point of initial data
in_train = np.zeros(len(X), dtype=np.bool) #make all false

in_train[:entry_number_init] = True #turns false to true 

print('Picked {} training entries'.format(in_train.sum()))

nsamples = in_train.sum()
train_fraction = 0.8
train_idx = int(train_fraction*nsamples)

Picked 39 training entries


# Random Forest Verification

In [10]:
from lolopy.learners import RandomForestRegressor
from lolopy.learners import RandomForestMixin
from lolopy.learners import BaseLoloRegressor
from lolopy import metrics as lolometrics
import sklearn
from sklearn import metrics

numberTrees = []
Depth = []
maeTrain = []
maeTest = []
numb = 10
depth = 2**6


model = RandomForestRegressor(num_trees = 350) #model if iterate through
model.fit(X[:train_idx, :], y[:train_idx])

test_pred, test_std = model.predict(X[train_idx:nsamples, :], return_std=True)
train_pred, train_std = model.predict(X[:train_idx, :], return_std=True)

rmse_test = lolometrics.root_mean_squared_error(X[train_idx:nsamples, :], test_pred)
rmse_train = lolometrics.root_mean_squared_error(X[:train_idx, :], train_pred)

mae_test = sklearn.metrics.mean_absolute_error(y[train_idx:nsamples], test_pred)
mae_train = sklearn.metrics.mean_absolute_error(y[:train_idx], train_pred)

In [11]:
iterationTree = 10

testingTrees = 1000
getMeanTree = 0

trees = [20,250,500,750,1000,1250]

for treesRun in range(1,testingTrees):
    tree_list = []

    rmse_testTreeList = []
    rmse_trainTreeList = []

    mae_testTreeList = []
    mae_trainTreeList = []

    min_mae_testTreeList = []
    min_mae_testTreeList = []
    for loops in trees:
        #Create RandomForest with parameters
        model = RandomForestRegressor(num_trees = loops) #model if iterate through
        model.fit(X[:train_idx, :], y[:train_idx])

        #Make Predictions for Dataset
        test_pred, test_std = model.predict(X[train_idx:nsamples, :], return_std=True)
        train_pred, train_std = model.predict(X[:train_idx, :], return_std=True)

        #Get Root Mean Squared Error
        rmse_test = lolometrics.root_mean_squared_error(X[train_idx:nsamples, :], test_pred)
        rmse_train = lolometrics.root_mean_squared_error(X[:train_idx, :], train_pred)

        #Append Root Mean Squared Error to Graph
        rmse_testTreeList.append(rmse_test)
        rmse_trainTreeList.append(rmse_train)

        #Get Mean Squared Error
        mae_test = sklearn.metrics.mean_absolute_error(y[train_idx:nsamples], test_pred)
        mae_train = sklearn.metrics.mean_absolute_error(y[:train_idx], train_pred)

        #Append Mean Squared Error
        mae_testTreeList.append(mae_test)
        mae_trainTreeList.append(mae_train)

        tree_list.append(loops)
        properIndex = mae_testTreeList.index(min(mae_testTreeList), 0, len(mae_testTreeList))
        
    getMeanTree += tree_list[properIndex]
getMeanTree = getMeanTree/testingTrees


In [12]:
print(getMeanTree)

364.48


In [13]:
print(mae_trainTreeList)
#saved information for paper
mae_testTreeList = [18.32866716831839, 18.768734163366076, 17.54252874875999, 16.824204477410802, 17.897590507422905, 17.242132747320284, 17.367143093888302, 16.98295398809759, 18.65371960493499]
mae_testTrainList = [15.082224113382564, 14.912079298945025, 15.261766747363826, 14.721185982513031, 15.027725270711588, 14.785662033802451, 15.244098713193374, 15.401093781582704, 15.479296060226723]

[136.41868449536892, 140.3267380840626, 145.2672573002872, 150.91511861671427, 152.58321286614486, 149.10555138090038]


In [14]:
layout0= go.Layout(title= "Tree MAE Error", hovermode= 'closest',
    xaxis= dict(title= 'Estimators (Tree)',zeroline= False, gridwidth= 2),
    yaxis= dict(title= 'MAE',zeroline= False, gridwidth= 2), height=800, width=800, font=dict(size=36))

test_mae = go.Scatter(x = tree_list, y = mae_testTreeList, mode = 'markers', 
                      marker= dict(size= 25, color='red'), name= "Test Data")

train_mae = go.Scatter(x = tree_list, y = mae_trainTreeList, mode = 'markers', 
                      marker= dict(size= 25, color='green'), name= "Train Data")

traces = [train_mae,test_mae]
fig = go.Figure(traces, layout=layout0)
fig.update_layout(
                        width = 2000,
                        height = 1000,
                        font=dict(size=36,family='Times New Roman'),
                        showlegend = True,
    
                        paper_bgcolor ='rgba(0,0,0,0)',
                        plot_bgcolor='rgba(0,0,0,0)',
    
                        xaxis_gridwidth= 1,
                        xaxis_gridcolor ='gray',
                        xaxis_linewidth=2, 
                        xaxis_linecolor='gray',
                        yaxis_gridwidth= 1,
                        yaxis_gridcolor ='gray',
                        yaxis_linewidth=2, 
                        yaxis_linecolor='gray',
                        xaxis = dict(mirror = True, ticklen = 15),
                        yaxis = dict(mirror = True, ticklen = 15),
    
)
fig.update_yaxes(automargin=True)
iplot(fig)