# Import

In [1]:
import pandas as pd 
import numpy as np
from script.load_dataset import input_dataset,chemical_space
from script.load_descriptors import get_descriptors
from script.select_model import get_selected_model
from script.select_feature import feature_selection
from script.predict_yield import get_sorted_pre_yield
from script.ort_select import get_orthogonal_selection

In [2]:
def format_output(condition):
    condition = np.array(condition)
    dic_condition = {}
    dic_condition['rank']=list(condition[:,0])
    dic_condition['Anode/Cathode']=list(condition[:,1])
    dic_condition['Solvent']=list(condition[:,2])
    dic_condition['Electrolyte']=list(condition[:,3])
    dic_condition['Current/Potential']=list(condition[:,4])
    df = pd.DataFrame(dic_condition)
    return df

# Dataset

In [3]:
n_round = 4
Input_dataset = input_dataset(n_round=n_round)
yield_std = Input_dataset.yield_std
input_data = Input_dataset.input_data
input_index = Input_dataset.input_index
Input_dataset.input_data_yield

Unnamed: 0,Entry,Anode/Cathode,Solvent,Electrolyte,Current/Potential,Yield (%)
0,1,Pt/Pt,AcOH,K3PO4,0.3 mA,17
1,2,GF/Pt,AcOH: TFE(1: 1),nBu4NOAc,0.3 mA,0
2,3,BDD/Pt,AcOH: TFE(1: 1),LiClO4,0.9 mA,13
3,4,Fe/Pt,AcOH: MeCN(1: 1),LiOAc,1.2 mA,0
4,5,Pt/GF,TFE,NaO2CAd,1.0 V,0
5,6,GF/GF,TFE: EtOH(1: 1),NaOAc,1.5 V,0
6,7,BDD/GF,TFE: MeCN(1: 1),NaOPiv,0.3 mA,3
7,8,Fe/GF,EtOH,nBu4NOAc,0.6 mA,0
8,9,Pt/BDD,EtOH: MeCN(1: 1),nBu4NPF6,0.9 mA,0
9,10,GF/BDD,MeCN,K3PO4,1.2 mA,0


# Descriptor

In [4]:
des_std = get_descriptors()

# Model Selection

In [5]:
selected_model = get_selected_model(input_index=input_index,des_std=des_std,
                                    yield_std=yield_std)

Model:    BG, Pearson R: 0.4360
Model:    DT, Pearson R: 0.3534
Model:    ET, Pearson R: 0.4353
Model:    GB, Pearson R: 0.4509
Model:   KNR, Pearson R: 0.5172
Model:   KRR, Pearson R: 0.5020
Model:  LSVR, Pearson R: 0.3287
Model:    RF, Pearson R: 0.4686
Model: Ridge, Pearson R: 0.4841
Model:   SVR, Pearson R: 0.5511
Model:   XGB, Pearson R: 0.4223
SVR


# Feature selection

In [6]:
Feature_selection = feature_selection(model_name=selected_model,
                    input_index=input_index,des_std=des_std,yield_std=yield_std)
print('Model: %5s, Index of feature: %s, Pearson R: %.4f'%(selected_model,
                str(Feature_selection.selected_feature),Feature_selection.pear))

Model:   SVR, Index of feature: [6, 22, 40, 37, 3, 1, 25, 15, 7, 34, 4, 2, 14, 23, 16, 11, 21, 27, 10, 5, 12, 17], Pearson R: 0.8096


# Yield prediction

In [7]:
sorted_pre_yield = get_sorted_pre_yield(model_name=selected_model,
input_index=input_index,des_std=des_std,yield_std=yield_std,
selected_feature=Feature_selection.selected_feature)
#Yield top 20 reaction condition combination
format_output(sorted_pre_yield[:20])

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,Pt/BDD,AcOH: TFE(1: 1),NaOPiv,0.9 mA
1,2,Pt/BDD,AcOH: TFE(1: 1),NaOPiv,1.0 V
2,3,Pt/BDD,AcOH: TFE(1: 1),NaOPiv,1.5 V
3,4,Pt/BDD,AcOH: TFE(1: 1),NaOPiv,0.6 mA
4,5,Pt/BDD,AcOH: TFE(1: 1),K3PO4,0.9 mA
5,6,Pt/BDD,AcOH: TFE(1: 1),K3PO4,1.0 V
6,7,Pt/BDD,AcOH: TFE(1: 1),K3PO4,1.5 V
7,8,Pt/BDD,AcOH: TFE(1: 1),K3PO4,0.6 mA
8,9,Pt/GF,AcOH: TFE(1: 1),NaOPiv,0.9 mA
9,10,Pt/GF,AcOH: TFE(1: 1),NaOPiv,1.0 V


# Orthogonal selection

In [8]:
#Choose to do the experiment
orthogonal_selection = get_orthogonal_selection(n_round=n_round,
                    input_data=input_data,sorted_pre_yield=sorted_pre_yield)
format_output(orthogonal_selection)

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,2,Pt/BDD,AcOH: TFE(1: 1),NaOPiv,1.0 V
1,7,Pt/BDD,AcOH: TFE(1: 1),K3PO4,1.5 V
2,11,Pt/GF,AcOH: TFE(1: 1),NaOPiv,1.5 V
3,14,Pt/GF,AcOH: TFE(1: 1),K3PO4,1.0 V
