# Load Dependencies

In [1]:
import pandas as pd 
import numpy as np
from script.load_dataset import input_dataset,chemical_space
from script.load_descriptors import get_descriptors
from script.select_feature import feature_selection
from script.predict_yield import get_sorted_pre_yield
from script.ort_select import get_orthogonal_selection
from script.select_model import get_best_model_and_param
from script.utils import format_output

# Load Dataset

In [3]:
n_round = 11
Input_dataset = input_dataset(n_round=n_round)
yield_std = Input_dataset.yield_std
input_data = Input_dataset.input_data
input_index = Input_dataset.input_index
Input_dataset.input_data_yield

Unnamed: 0,Entry,Anode/Cathode,Solvent,Electrolyte,Current/Potential,Yield (%)
0,1,Pt/Pt,AcOH,K3PO4,0.3 mA,17
1,2,GF/Pt,AcOH: TFE(1: 1),nBu4NOAc,0.3 mA,0
2,3,BDD/Pt,AcOH: TFE(1: 1),LiClO4,0.9 mA,13
3,4,Fe/Pt,AcOH: MeCN(1: 1),LiOAc,1.2 mA,0
4,5,Pt/GF,TFE,NaO2CAd,1.0 V,0
5,6,GF/GF,TFE: EtOH(1: 1),NaOAc,1.5 V,0
6,7,BDD/GF,TFE: MeCN(1: 1),NaOPiv,0.3 mA,3
7,8,Fe/GF,EtOH,nBu4NOAc,0.6 mA,0
8,9,Pt/BDD,EtOH: MeCN(1: 1),nBu4NPF6,0.9 mA,0
9,10,GF/BDD,MeCN,K3PO4,1.2 mA,0


# Load Descriptor

In [4]:
des_std = get_descriptors()

# Model Selection

In [5]:
best_model_name,best_model_params,best_params=get_best_model_and_param(des_std,yield_std,input_index)# Socre: -MAE

Model:   BG, Best Socre: -0.0756, Best Param:  {'n_estimators': 50}
Model:   DT, Best Socre: -0.0884, Best Param:  {'max_depth': None}
Model:   ET, Best Socre: -0.0719, Best Param:  {'max_depth': 10, 'n_estimators': 300}
Model:   GB, Best Socre: -0.0768, Best Param:  {'max_depth': 3, 'n_estimators': 50}
Model:  KNR, Best Socre: -0.0864, Best Param:  {'n_neighbors': 2}
Model:  KRR, Best Socre: -0.0864, Best Param:  {'gamma': None}
Model: LSVR, Best Socre: -0.0900, Best Param:  {'C': 4, 'epsilon': 0.1}


Model:   RF, Best Socre: -0.0757, Best Param:  {'max_depth': 10, 'n_estimators': 50}
Model: Ridge, Best Socre: -0.0836, Best Param:  {'alpha': 1.5}
Model:  SVR, Best Socre: -0.0803, Best Param:  {'gamma': 'auto', 'kernel': 'rbf'}
Model:  XGB, Best Socre: -0.0889, Best Param:  {'max_depth': None}
Best Model:   ET, Best Param:  {'max_depth': 10, 'n_estimators': 300}


# Feature selection

In [7]:
Feature_selection = feature_selection(model_name=best_model_name,
                    input_index=input_index,des_std=des_std,yield_std=yield_std,best_params=best_params)
print('Model: %5s, Index of feature: %s, Pearson R: %.4f'%(best_model_name,
                str(Feature_selection.selected_feature),Feature_selection.pear))

[19, 10, 22, 23, 33, 7, 6, 32, 35]
Model:    ET, Index of feature: [19, 10, 22, 23, 33, 7, 6, 32, 35], Pearson R: 0.8066


# Yield prediction

In [60]:
sorted_pre_yield = get_sorted_pre_yield(model_name=best_model_name,
input_index=input_index,des_std=des_std,yield_std=yield_std,
selected_feature=Feature_selection.selected_feature)
#Yield top 20 reaction condition combination
format_output(sorted_pre_yield[:20])

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,GF/Pt,AcOH: TFE(1: 1),NaOPiv,0.3 mA
1,2,GF/Pt,AcOH: TFE(1: 1),NaOPiv,1.0 V
2,3,GF/Pt,AcOH: TFE(1: 1),NaOPiv,0.6 mA
3,4,GF/Pt,AcOH: TFE(1: 1),K3PO4,0.3 mA
4,5,GF/Pt,AcOH: TFE(1: 1),K3PO4,1.0 V
5,6,GF/Pt,AcOH: TFE(1: 1),K3PO4,0.6 mA
6,7,GF/Pt,AcOH: TFE(1: 1),KOAc,0.3 mA
7,8,GF/Pt,AcOH: TFE(1: 1),KOAc,0.6 mA
8,9,GF/Pt,AcOH: TFE(1: 1),LiClO4,0.3 mA
9,10,GF/Pt,AcOH: TFE(1: 1),LiClO4,1.0 V
