# Multiple regressions models on Statistically processed data

Different Multiple Regression with Scheffé interactions are trained with Statistically process data to assess the performance of the models on each output (elastic modulus E, hardness H, EBSD Confidence Index CI and Image Quality IQ).

For each output, for each multiple regression model:
 - set OLS expressions for each model
 - import dataset as dataframe and compute Scheffé interaction terms for quartic multiple regression
 - iterative kfold crossvalidation on each multiple regression
 - compute R² mean and standard deviation for each model

## Import libraries

In [15]:
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import MultipleRegression as MR
import os


## Path to datasets

In [16]:
datasets_dir=os.getenv("DATASETS_DIR")
run_dir=os.getenv("RUN_DIR")

## Model assessement on elastic modulus E

### Multiple regression model

In [17]:
expression_linear ='E ~ Zr + Nb + Mo + Ti + Cr  -1'
expression_quadratic = 'E ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr  -1'
expression_sp_cubic = 'E ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr -1'
expression_cubic = 'E ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo + Nb:Ti:Nb_Ti + Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr -1'
expression_quartic = 'E ~ Zr + Nb + Mo + Ti + Cr + Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo +Nb:Ti:Nb_Ti +Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr + Zr:Nb:Mo:Ti + Zr:Nb:Mo:Cr + Zr:Nb:Ti:Cr + Zr:Mo:Ti:Cr + Nb:Mo:Ti:Cr + Zr2:Nb:Mo +Zr2:Nb:Ti +Zr2:Nb:Cr +Zr2:Mo:Ti +Zr2:Mo:Cr +Zr2:Ti:Cr + Zr:Nb2:Mo + Zr:Nb2:Ti +Zr:Nb2:Cr + Zr:Nb:Mo2 + Zr:Nb:Ti2 +Zr:Nb:Cr2 + Zr:Mo2:Ti + Zr:Mo2:Cr + Zr:Mo:Ti2 + Zr:Mo:Cr2 + Zr:Ti2:Cr + Zr:Ti:Cr2 + Nb2:Mo:Ti + Nb2:Mo:Cr + Nb2:Ti:Cr + Nb:Mo2:Ti + Nb:Mo2:Cr +  Nb:Mo:Ti2 + Nb:Mo:Cr2 + Nb:Ti2:Cr + Nb:Ti:Cr2 + Mo2:Ti:Cr +Mo:Ti2:Cr + Mo:Ti:Cr2 + Zr:Nb:Zr_Nb2 +Zr:Mo:Zr_Mo2 + Zr:Ti:Zr_Ti2+ Zr:Cr:Zr_Cr2 + Nb:Mo:Nb_Mo2 +Nb:Ti:Nb_Ti2 +Nb:Cr:Nb_Cr2 +Mo:Ti:Mo_Ti2 + Mo:Cr:Mo_Cr2 + Ti:Cr:Ti_Cr2 -1'


### Import database 

In [18]:
data=pd.read_csv(f"{datasets_dir}/Compo_E_wo_outlier.csv", header=0)
data.columns=['compo','sample','Zr', 'Nb','Mo','Ti','Cr','Zr_m','Nb_m','Mo_m','Ti_m','Cr_m','E']
data=MR.Scheffe_interactions_terms(data)

### Define X and y data and train the multiple regressions 

In [19]:
X=data[['Zr','Nb','Mo','Ti','Cr','Zr_Nb','Zr_Mo','Zr_Ti','Zr_Cr','Nb_Mo','Nb_Ti','Nb_Cr','Mo_Ti','Mo_Cr','Ti_Cr','Zr2','Nb2','Mo2','Ti2','Cr2','Zr_Nb2','Zr_Mo2','Zr_Ti2','Zr_Cr2','Nb_Mo2','Nb_Ti2','Nb_Cr2','Mo_Ti2','Mo_Cr2','Ti_Cr2']]
y= data['E']

k=5
nb_it=30
output='E'

model_linear, MAE_list_linear,R2_list_linear,Y_pred_linear,Y_test_linear= MR.fit_outputs(expression_linear,k,nb_it,output,X,y)
model_quadratic, MAE_list_quadratic,R2_list_quadratic,Y_pred_quadratic,Y_test_quadratic= MR.fit_outputs(expression_quadratic,k,nb_it,output,X,y)
model_sp_cubic, MAE_list_sp_cubic,R2_list_sp_cubic,Y_pred_sp_cubic,Y_test_sp_cubic= MR.fit_outputs(expression_sp_cubic,k,nb_it,output,X,y)
model_cubic, MAE_list_cubic,R2_list_cubic,Y_pred_cubic,Y_test_cubic= MR.fit_outputs(expression_cubic,k,nb_it,output,X,y)
model_quartic, MAE_list_quartic,R2_list_quartic,Y_pred_quartic,Y_test_quartic= MR.fit_outputs(expression_quartic,k,nb_it,output,X,y)

df_all_R2=pd.DataFrame([R2_list_linear, R2_list_quadratic,R2_list_sp_cubic,R2_list_cubic,R2_list_quartic])
df_mean_std_R2=pd.DataFrame(df_all_R2.T.mean().tolist(),columns=['mean R2'],index=['linear','quadratic','special cubic','cubic', 'quartic'])
df_mean_std_R2['std R2']=df_all_R2.T.std().tolist()
display(df_mean_std_R2)


Unnamed: 0,mean R2,std R2
linear,0.798645,0.014472
quadratic,0.898061,0.011121
special cubic,0.913233,0.010504
cubic,0.930598,0.009667
quartic,0.950646,0.010382


## Model assessement on H

### Multiple regression model

In [6]:
expression_linear ='H ~ Zr + Nb + Mo + Ti + Cr  -1'
expression_quadratic = 'H ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr  -1'
expression_sp_cubic = 'H ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr -1'
expression_cubic = 'H ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo + Nb:Ti:Nb_Ti + Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr -1'
expression_quartic = 'H ~ Zr + Nb + Mo + Ti + Cr + Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo +Nb:Ti:Nb_Ti +Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr + Zr:Nb:Mo:Ti + Zr:Nb:Mo:Cr + Zr:Nb:Ti:Cr + Zr:Mo:Ti:Cr + Nb:Mo:Ti:Cr + Zr2:Nb:Mo +Zr2:Nb:Ti +Zr2:Nb:Cr +Zr2:Mo:Ti +Zr2:Mo:Cr +Zr2:Ti:Cr + Zr:Nb2:Mo + Zr:Nb2:Ti +Zr:Nb2:Cr + Zr:Nb:Mo2 + Zr:Nb:Ti2 +Zr:Nb:Cr2 + Zr:Mo2:Ti + Zr:Mo2:Cr + Zr:Mo:Ti2 + Zr:Mo:Cr2 + Zr:Ti2:Cr + Zr:Ti:Cr2 + Nb2:Mo:Ti + Nb2:Mo:Cr + Nb2:Ti:Cr + Nb:Mo2:Ti + Nb:Mo2:Cr +  Nb:Mo:Ti2 + Nb:Mo:Cr2 + Nb:Ti2:Cr + Nb:Ti:Cr2 + Mo2:Ti:Cr +Mo:Ti2:Cr + Mo:Ti:Cr2 + Zr:Nb:Zr_Nb2 +Zr:Mo:Zr_Mo2 + Zr:Ti:Zr_Ti2+ Zr:Cr:Zr_Cr2 + Nb:Mo:Nb_Mo2 +Nb:Ti:Nb_Ti2 +Nb:Cr:Nb_Cr2 +Mo:Ti:Mo_Ti2 + Mo:Cr:Mo_Cr2 + Ti:Cr:Ti_Cr2 -1'


### Import database 

In [7]:
data=pd.read_csv(f"{datasets_dir}/Compo_H_wo_outlier.csv", header=0)
data.columns=['compo','sample','Zr', 'Nb','Mo','Ti','Cr','Zr_m','Nb_m','Mo_m','Ti_m','Cr_m','H']
data=MR.Scheffe_interactions_terms(data)

### Define X and y data and train the multiple regressions 

In [8]:
X=data[['Zr','Nb','Mo','Ti','Cr','Zr_Nb','Zr_Mo','Zr_Ti','Zr_Cr','Nb_Mo','Nb_Ti','Nb_Cr','Mo_Ti','Mo_Cr','Ti_Cr','Zr2','Nb2','Mo2','Ti2','Cr2','Zr_Nb2','Zr_Mo2','Zr_Ti2','Zr_Cr2','Nb_Mo2','Nb_Ti2','Nb_Cr2','Mo_Ti2','Mo_Cr2','Ti_Cr2']]
y= data['H']

k=5
nb_it=30
output='H'

model_linear, MAE_list_linear,R2_list_linear,Y_pred_linear,Y_test_linear= MR.fit_outputs(expression_linear,k,nb_it,output,X,y)
model_quadratic, MAE_list_quadratic,R2_list_quadratic,Y_pred_quadratic,Y_test_quadratic= MR.fit_outputs(expression_quadratic,k,nb_it,output,X,y)
model_sp_cubic, MAE_list_sp_cubic,R2_list_sp_cubic,Y_pred_sp_cubic,Y_test_sp_cubic= MR.fit_outputs(expression_sp_cubic,k,nb_it,output,X,y)
model_cubic, MAE_list_cubic,R2_list_cubic,Y_pred_cubic,Y_test_cubic= MR.fit_outputs(expression_cubic,k,nb_it,output,X,y)
model_quartic, MAE_list_quartic,R2_list_quartic,Y_pred_quartic,Y_test_quartic= MR.fit_outputs(expression_quartic,k,nb_it,output,X,y)

df_all_R2=pd.DataFrame([R2_list_linear, R2_list_quadratic,R2_list_sp_cubic,R2_list_cubic,R2_list_quartic])
df_mean_std_R2=pd.DataFrame(df_all_R2.T.mean().tolist(),columns=['mean R2'],index=['linear','quadratic','special cubic','cubic', 'quartic'])
df_mean_std_R2['std R2']=df_all_R2.T.std().tolist()
display(df_mean_std_R2)


Unnamed: 0,mean R2,std R2
linear,0.60552,0.026663
quadratic,0.777289,0.017732
special cubic,0.827958,0.016644
cubic,0.862331,0.017009
quartic,0.915855,0.013723


## Model assessement on CI

### Multiple regression model

In [9]:
expression_linear ='CI ~ Zr + Nb + Mo + Ti + Cr  -1'
expression_quadratic = ' CI~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr  -1'
expression_sp_cubic = 'CI ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr -1'
expression_cubic = 'CI ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo + Nb:Ti:Nb_Ti + Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr -1'
expression_quartic = 'CI ~ Zr + Nb + Mo + Ti + Cr + Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo +Nb:Ti:Nb_Ti +Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr + Zr:Nb:Mo:Ti + Zr:Nb:Mo:Cr + Zr:Nb:Ti:Cr + Zr:Mo:Ti:Cr + Nb:Mo:Ti:Cr + Zr2:Nb:Mo +Zr2:Nb:Ti +Zr2:Nb:Cr +Zr2:Mo:Ti +Zr2:Mo:Cr +Zr2:Ti:Cr + Zr:Nb2:Mo + Zr:Nb2:Ti +Zr:Nb2:Cr + Zr:Nb:Mo2 + Zr:Nb:Ti2 +Zr:Nb:Cr2 + Zr:Mo2:Ti + Zr:Mo2:Cr + Zr:Mo:Ti2 + Zr:Mo:Cr2 + Zr:Ti2:Cr + Zr:Ti:Cr2 + Nb2:Mo:Ti + Nb2:Mo:Cr + Nb2:Ti:Cr + Nb:Mo2:Ti + Nb:Mo2:Cr +  Nb:Mo:Ti2 + Nb:Mo:Cr2 + Nb:Ti2:Cr + Nb:Ti:Cr2 + Mo2:Ti:Cr +Mo:Ti2:Cr + Mo:Ti:Cr2 + Zr:Nb:Zr_Nb2 +Zr:Mo:Zr_Mo2 + Zr:Ti:Zr_Ti2+ Zr:Cr:Zr_Cr2 + Nb:Mo:Nb_Mo2 +Nb:Ti:Nb_Ti2 +Nb:Cr:Nb_Cr2 +Mo:Ti:Mo_Ti2 + Mo:Cr:Mo_Cr2 + Ti:Cr:Ti_Cr2 -1'


### Import database 

In [10]:
data=pd.read_csv(f"{datasets_dir}/Data_averaged.csv", header=0)
data.columns=['compo','sample','Zr', 'Nb','Mo','Ti','Cr','Zr_m','Nb_m','Mo_m','Ti_m','Cr_m', 'E', 'H', 'stdE', 'stdH','CI','IQ','class']
data=MR.Scheffe_interactions_terms(data)

### Define X and y data and train the multiple regressions 

In [11]:
X=data[['Zr','Nb','Mo','Ti','Cr','Zr_Nb','Zr_Mo','Zr_Ti','Zr_Cr','Nb_Mo','Nb_Ti','Nb_Cr','Mo_Ti','Mo_Cr','Ti_Cr','Zr2','Nb2','Mo2','Ti2','Cr2','Zr_Nb2','Zr_Mo2','Zr_Ti2','Zr_Cr2','Nb_Mo2','Nb_Ti2','Nb_Cr2','Mo_Ti2','Mo_Cr2','Ti_Cr2']]
y= data['CI']

k=5
nb_it=30
output='CI'

model_linear, MAE_list_linear,R2_list_linear,Y_pred_linear,Y_test_linear= MR.fit_outputs(expression_linear,k,nb_it,output,X,y)
model_quadratic, MAE_list_quadratic,R2_list_quadratic,Y_pred_quadratic,Y_test_quadratic= MR.fit_outputs(expression_quadratic,k,nb_it,output,X,y)
model_sp_cubic, MAE_list_sp_cubic,R2_list_sp_cubic,Y_pred_sp_cubic,Y_test_sp_cubic= MR.fit_outputs(expression_sp_cubic,k,nb_it,output,X,y)
model_cubic, MAE_list_cubic,R2_list_cubic,Y_pred_cubic,Y_test_cubic= MR.fit_outputs(expression_cubic,k,nb_it,output,X,y)
model_quartic, MAE_list_quartic,R2_list_quartic,Y_pred_quartic,Y_test_quartic= MR.fit_outputs(expression_quartic,k,nb_it,output,X,y)

df_all_R2=pd.DataFrame([R2_list_linear, R2_list_quadratic,R2_list_sp_cubic,R2_list_cubic,R2_list_quartic])
df_mean_std_R2=pd.DataFrame(df_all_R2.T.mean().tolist(),columns=['mean R2'],index=['linear','quadratic','special cubic','cubic', 'quartic'])
df_mean_std_R2['std R2']=df_all_R2.T.std().tolist()
display(df_mean_std_R2)


Unnamed: 0,mean R2,std R2
linear,0.471658,0.07028
quadratic,0.660545,0.080759
special cubic,0.679949,0.074751
cubic,0.756954,0.071959
quartic,0.832662,0.075821


## Model assessement on IQ

### Multiple regression model

In [12]:
expression_linear ='IQ ~ Zr + Nb + Mo + Ti + Cr  -1'
expression_quadratic = 'IQ ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr  -1'
expression_sp_cubic = 'IQ ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr -1'
expression_cubic = 'IQ ~ Zr + Nb + Mo + Ti + Cr +Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Mo +Zr:Nb:Ti +Zr:Nb:Cr +Zr:Mo:Ti +Zr:Mo:Cr +Zr:Ti:Cr + Nb:Mo:Ti + Nb:Mo:Cr + Nb:Ti:Cr + Mo:Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo + Nb:Ti:Nb_Ti + Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr -1'
expression_quartic = 'IQ ~ Zr + Nb + Mo + Ti + Cr + Zr:Nb +Zr:Mo +Zr:Ti +Zr:Cr +Nb:Mo +Nb:Ti +Nb:Cr +Mo:Ti + Mo:Cr + Ti:Cr + Zr:Nb:Zr_Nb +Zr:Mo:Zr_Mo + Zr:Ti:Zr_Ti+ Zr:Cr:Zr_Cr + Nb:Mo:Nb_Mo +Nb:Ti:Nb_Ti +Nb:Cr:Nb_Cr +Mo:Ti:Mo_Ti + Mo:Cr:Mo_Cr + Ti:Cr:Ti_Cr + Zr:Nb:Mo:Ti + Zr:Nb:Mo:Cr + Zr:Nb:Ti:Cr + Zr:Mo:Ti:Cr + Nb:Mo:Ti:Cr + Zr2:Nb:Mo +Zr2:Nb:Ti +Zr2:Nb:Cr +Zr2:Mo:Ti +Zr2:Mo:Cr +Zr2:Ti:Cr + Zr:Nb2:Mo + Zr:Nb2:Ti +Zr:Nb2:Cr + Zr:Nb:Mo2 + Zr:Nb:Ti2 +Zr:Nb:Cr2 + Zr:Mo2:Ti + Zr:Mo2:Cr + Zr:Mo:Ti2 + Zr:Mo:Cr2 + Zr:Ti2:Cr + Zr:Ti:Cr2 + Nb2:Mo:Ti + Nb2:Mo:Cr + Nb2:Ti:Cr + Nb:Mo2:Ti + Nb:Mo2:Cr +  Nb:Mo:Ti2 + Nb:Mo:Cr2 + Nb:Ti2:Cr + Nb:Ti:Cr2 + Mo2:Ti:Cr +Mo:Ti2:Cr + Mo:Ti:Cr2 + Zr:Nb:Zr_Nb2 +Zr:Mo:Zr_Mo2 + Zr:Ti:Zr_Ti2+ Zr:Cr:Zr_Cr2 + Nb:Mo:Nb_Mo2 +Nb:Ti:Nb_Ti2 +Nb:Cr:Nb_Cr2 +Mo:Ti:Mo_Ti2 + Mo:Cr:Mo_Cr2 + Ti:Cr:Ti_Cr2 -1'


### Import database 

In [13]:
data=pd.read_csv(f"{datasets_dir}/Data_averaged.csv", header=0)
data.columns=['compo','sample','Zr', 'Nb','Mo','Ti','Cr','Zr_m','Nb_m','Mo_m','Ti_m','Cr_m', 'E', 'H', 'stdE', 'stdH','CI','IQ','class']
data=MR.Scheffe_interactions_terms(data)

### Define X and y data and train the multiple regressions 

In [14]:
X=data[['Zr','Nb','Mo','Ti','Cr','Zr_Nb','Zr_Mo','Zr_Ti','Zr_Cr','Nb_Mo','Nb_Ti','Nb_Cr','Mo_Ti','Mo_Cr','Ti_Cr','Zr2','Nb2','Mo2','Ti2','Cr2','Zr_Nb2','Zr_Mo2','Zr_Ti2','Zr_Cr2','Nb_Mo2','Nb_Ti2','Nb_Cr2','Mo_Ti2','Mo_Cr2','Ti_Cr2']]
y= data['IQ']

k=5
nb_it=30
output='IQ'

model_linear, MAE_list_linear,R2_list_linear,Y_pred_linear,Y_test_linear= MR.fit_outputs(expression_linear,k,nb_it,output,X,y)
model_quadratic, MAE_list_quadratic,R2_list_quadratic,Y_pred_quadratic,Y_test_quadratic= MR.fit_outputs(expression_quadratic,k,nb_it,output,X,y)
model_sp_cubic, MAE_list_sp_cubic,R2_list_sp_cubic,Y_pred_sp_cubic,Y_test_sp_cubic= MR.fit_outputs(expression_sp_cubic,k,nb_it,output,X,y)
model_cubic, MAE_list_cubic,R2_list_cubic,Y_pred_cubic,Y_test_cubic= MR.fit_outputs(expression_cubic,k,nb_it,output,X,y)
model_quartic, MAE_list_quartic,R2_list_quartic,Y_pred_quartic,Y_test_quartic= MR.fit_outputs(expression_quartic,k,nb_it,output,X,y)

df_all_R2=pd.DataFrame([R2_list_linear, R2_list_quadratic,R2_list_sp_cubic,R2_list_cubic,R2_list_quartic])
df_mean_std_R2=pd.DataFrame(df_all_R2.T.mean().tolist(),columns=['mean R2'],index=['linear','quadratic','special cubic','cubic', 'quartic'])
df_mean_std_R2['std R2']=df_all_R2.T.std().tolist()
display(df_mean_std_R2)


Unnamed: 0,mean R2,std R2
linear,0.478117,0.070078
quadratic,0.675062,0.069941
special cubic,0.698441,0.066431
cubic,0.766688,0.069973
quartic,0.870797,0.051775
