# Load Dependencies

In [1]:
import pandas as pd 
import numpy as np
from script.load_dataset import input_dataset,chemical_space
from script.load_descriptors import get_descriptors
from script.select_feature import feature_selection
from script.predict_yield import get_sorted_pre_yield
from script.ort_select import get_orthogonal_selection
from script.select_model import get_best_model_and_param
from script.utils import format_output

# Load Dataset

In [3]:
n_round = 5
Input_dataset = input_dataset(n_round=n_round)
yield_std = Input_dataset.yield_std
input_data = Input_dataset.input_data
input_index = Input_dataset.input_index
Input_dataset.input_data_yield

Unnamed: 0,Entry,Anode/Cathode,Solvent,Electrolyte,Current/Potential,Yield (%)
0,1,Pt/Pt,AcOH,K3PO4,0.3 mA,17
1,2,GF/Pt,AcOH: TFE(1: 1),nBu4NOAc,0.3 mA,0
2,3,BDD/Pt,AcOH: TFE(1: 1),LiClO4,0.9 mA,13
3,4,Fe/Pt,AcOH: MeCN(1: 1),LiOAc,1.2 mA,0
4,5,Pt/GF,TFE,NaO2CAd,1.0 V,0
5,6,GF/GF,TFE: EtOH(1: 1),NaOAc,1.5 V,0
6,7,BDD/GF,TFE: MeCN(1: 1),NaOPiv,0.3 mA,3
7,8,Fe/GF,EtOH,nBu4NOAc,0.6 mA,0
8,9,Pt/BDD,EtOH: MeCN(1: 1),nBu4NPF6,0.9 mA,0
9,10,GF/BDD,MeCN,K3PO4,1.2 mA,0


# Load Descriptor

In [4]:
des_std = get_descriptors()

# Model Selection

In [5]:
best_model_name,best_model_params,best_params=get_best_model_and_param(des_std,yield_std,input_index)# Socre: -MAE

Model:   BG, Best Socre: -0.0650, Best Param:  {'n_estimators': 40}
Model:   DT, Best Socre: -0.0857, Best Param:  {'max_depth': None}
Model:   ET, Best Socre: -0.0581, Best Param:  {'max_depth': None, 'n_estimators': 50}
Model:   GB, Best Socre: -0.0565, Best Param:  {'max_depth': 3, 'n_estimators': 400}
Model:  KNR, Best Socre: -0.0582, Best Param:  {'n_neighbors': 2}
Model:  KRR, Best Socre: -0.0586, Best Param:  {'gamma': None}
Model: LSVR, Best Socre: -0.0505, Best Param:  {'C': 3, 'epsilon': 0.05}
Model:   RF, Best Socre: -0.0631, Best Param:  {'max_depth': 20, 'n_estimators': 400}
Model: Ridge, Best Socre: -0.0575, Best Param:  {'alpha': 1.5}
Model:  SVR, Best Socre: -0.0725, Best Param:  {'gamma': 'scale', 'kernel': 'linear'}
Model:  XGB, Best Socre: -0.0561, Best Param:  {'max_depth': None}
Best Model: LSVR, Best Param:  {'C': 3, 'epsilon': 0.05}


# Feature selection

In [6]:
import warnings 
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
Feature_selection = feature_selection(model_name=best_model_name,
                    input_index=input_index,des_std=des_std,yield_std=yield_std,best_params=best_params)
print('Model: %5s, Index of feature: %s, Pearson R: %.4f'%(best_model_name,
                str(Feature_selection.selected_feature),Feature_selection.pear))

[7, 21, 18, 31, 14, 1, 2, 0, 29]
Model:  LSVR, Index of feature: [7, 21, 18, 31, 14, 1, 2, 0, 29], Pearson R: 0.8573


# Yield prediction

In [7]:
sorted_pre_yield = get_sorted_pre_yield(model_name=best_model_name,
input_index=input_index,des_std=des_std,yield_std=yield_std,
selected_feature=Feature_selection.selected_feature,best_params=best_params)
#Yield top 20 reaction condition combination
format_output(sorted_pre_yield[:20])

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,GF/Fe,AcOH,LiOAc,0.6 mA
1,2,GF/Pt,AcOH,LiOAc,0.6 mA
2,3,GF/BDD,AcOH,LiOAc,0.6 mA
3,4,GF/Fe,AcOH,K3PO4,0.6 mA
4,5,GF/BDD,AcOH,K3PO4,0.6 mA
5,6,BDD/Fe,AcOH,LiOAc,0.6 mA
6,7,BDD/Pt,AcOH,LiOAc,0.6 mA
7,8,Pt/Fe,AcOH,LiOAc,0.6 mA
8,9,BDD/BDD,AcOH,LiOAc,0.6 mA
9,10,GF/GF,AcOH,LiOAc,0.6 mA


# Orthogonal selection

In [8]:
#Choose to do the experiment
orthogonal_selection = get_orthogonal_selection(n_round=n_round,
                    input_data=input_data,sorted_pre_yield=sorted_pre_yield)
format_output(orthogonal_selection)

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,GF/Fe,AcOH,LiOAc,0.6 mA
1,31,GF/BDD,AcOH,NaOAc,0.6 mA
2,54,BDD/Fe,AcOH,NaOPiv,0.6 mA
3,65,BDD/Pt,AcOH,nBu4NPF6,0.6 mA
