## This notebook calibrates the Cpx-Liq machine learning barometr used by Thermobar. 
- We follow Petrelli, where a different random state is used for pressure and temperature
- Here, we use sklearn pipelines which we save to onnx

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import r2_score
import scipy.cluster.hierarchy as shc
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingRegressor

%matplotlib inline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy import stats
import warnings as w
import sys
sys.path.append("../../..")
import Thermobar as pt

In [13]:
out_train=pt.import_excel('GlobalDataset_Final_rev9_TrainValidation.xlsx',
                          sheet_name='Train_Thermobar_Format')
out_train_input=out_train['my_input']
Cpx_train=out_train['Cpxs']
Liq_train=out_train['Liqs']
Cpx_train_noID_noT=Cpx_train.drop(['Sample_ID_Cpx'], axis=1)
Liq_train_noID_noT=Liq_train.drop(['Sample_ID_Liq', 'Fe3Fet_Liq', 'NiO_Liq', 'CoO_Liq', 'CO2_Liq'], axis=1)
Cpx_Liq_Combo_train=pd.concat([Liq_train_noID_noT, Cpx_train_noID_noT], axis=1)
x_train=Cpx_Liq_Combo_train.values
y_train_T=out_train_input['T_K']
y_train_P=out_train_input['P_GPa']*10

In [14]:
## Test dataset
out_test=pt.import_excel('GlobalDataset_Final_rev9_TrainValidation.xlsx',
                          sheet_name='Test_Thermobar_Format')
out_test_input=out_test['my_input']
Cpx_test=out_test['Cpxs']
Liq_test=out_test['Liqs']
Cpx_test_noID_noT=Cpx_test.drop(['Sample_ID_Cpx'], axis=1)
Liq_test_noID_noT=Liq_test.drop(['Sample_ID_Liq', 'Fe3Fet_Liq', 'NiO_Liq', 'CoO_Liq', 'CO2_Liq'], axis=1)
Cpx_Liq_Combo_test=pd.concat([Liq_test_noID_noT, Cpx_test_noID_noT], axis=1)
x_test=Cpx_Liq_Combo_test.values
y_test_T=out_test_input['T_K']
y_test_P=out_test_input['P_GPa']*10

## Making a pipeline

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

regr_T = ExtraTreesRegressor(n_estimators=550, criterion='squared_error',
                             max_features=22, random_state=280) 

pipe = Pipeline([
    ('scalar', StandardScaler()),
    ('regressor', regr_T)
])

rf_model =pipe.fit(x_train, y_train_T)


In [16]:
# Maurizoi uses convert_sklearn
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_types=[('float_input', FloatTensorType([None, x_test.shape[1]]))]
model_onnx = convert_sklearn(rf_model, initial_types=initial_types)

with open("Petrelli2020_Cpx_Liq_Temp.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())
    
import onnxruntime as rt
sess = rt.InferenceSession("Petrelli2020_Cpx_Liq_Temp.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: x_test.astype(np.float32)})[0]    

In [17]:
#
import onnxruntime as rt
sess = rt.InferenceSession("Petrelli2020_Cpx_Liq_Temp.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: x_test.astype(np.float32)})[0]


## Old way - using Pickle

In [18]:
import joblib
with open('ETR_Temp_Petrelli2020_Cpx_Liq.pkl', 'rb') as f:
    ETR_Temp_P2020_Cpx_Liq=joblib.load(f)
    # 

In [19]:
reg=ETR_Temp_P2020_Cpx_Liq
for tree in reg.estimators_:
    voting = []
    for tree in reg.estimators_:
        #print(tree)
        voting.append(tree.predict(x_test).tolist())
    voting = np.asarray(voting)
voting 

array([[1373.15, 1373.15, 1333.15, ..., 1548.15, 1656.15, 1656.15],
       [1433.15, 1433.15, 1433.15, ..., 1773.15, 1688.15, 1688.15],
       [1258.15, 1258.15, 1258.15, ..., 1473.15, 1473.15, 1473.15],
       ...,
       [1423.15, 1540.15, 1573.15, ..., 1563.15, 1811.15, 1811.15],
       [1573.15, 1573.15, 1573.15, ..., 1693.15, 1693.15, 1693.15],
       [1394.15, 1394.15, 1394.15, ..., 1448.15, 1448.15, 1448.15]])

In [22]:
import onnxruntime as rt

sess = rt.InferenceSession("Pipeline_onnx_test.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: x_test.astype(np.float32)})[0]


In [20]:
sess

<onnxruntime.capi.onnxruntime_inference_collection.InferenceSession at 0x2e33b18e760>