# Import

In [1]:
import pandas as pd 
import numpy as np
from script.load_dataset import input_dataset,chemical_space
from script.load_descriptors import get_descriptors
from script.select_model import get_selected_model
from script.select_feature import feature_selection
from script.predict_yield import get_sorted_pre_yield
from script.ort_select import get_orthogonal_selection

In [2]:
def format_output(condition):
    condition = np.array(condition)
    dic_condition = {}
    dic_condition['rank']=list(condition[:,0])
    dic_condition['Anode/Cathode']=list(condition[:,1])
    dic_condition['Solvent']=list(condition[:,2])
    dic_condition['Electrolyte']=list(condition[:,3])
    dic_condition['Current/Potential']=list(condition[:,4])
    df = pd.DataFrame(dic_condition)
    return df

# Dataset

In [3]:
n_round = 8
Input_dataset = input_dataset(n_round=n_round)
yield_std = Input_dataset.yield_std
input_data = Input_dataset.input_data
input_index = Input_dataset.input_index
Input_dataset.input_data_yield

Unnamed: 0,Entry,Anode/Cathode,Solvent,Electrolyte,Current/Potential,Yield (%)
0,1,Pt/Pt,AcOH,K3PO4,0.3 mA,17
1,2,GF/Pt,AcOH: TFE(1: 1),nBu4NOAc,0.3 mA,0
2,3,BDD/Pt,AcOH: TFE(1: 1),LiClO4,0.9 mA,13
3,4,Fe/Pt,AcOH: MeCN(1: 1),LiOAc,1.2 mA,0
4,5,Pt/GF,TFE,NaO2CAd,1.0 V,0
5,6,GF/GF,TFE: EtOH(1: 1),NaOAc,1.5 V,0
6,7,BDD/GF,TFE: MeCN(1: 1),NaOPiv,0.3 mA,3
7,8,Fe/GF,EtOH,nBu4NOAc,0.6 mA,0
8,9,Pt/BDD,EtOH: MeCN(1: 1),nBu4NPF6,0.9 mA,0
9,10,GF/BDD,MeCN,K3PO4,1.2 mA,0


# Descriptor

In [4]:
des_std = get_descriptors()

# Model Selection

In [5]:
selected_model = get_selected_model(input_index=input_index,des_std=des_std,
                                    yield_std=yield_std)

Model:    BG, Pearson R: 0.6899
Model:    DT, Pearson R: 0.6034
Model:    ET, Pearson R: 0.7832
Model:    GB, Pearson R: 0.6758
Model:   KNR, Pearson R: 0.7704
Model:   KRR, Pearson R: 0.6438
Model:  LSVR, Pearson R: 0.4972
Model:    RF, Pearson R: 0.7386
Model: Ridge, Pearson R: 0.6303
Model:   SVR, Pearson R: 0.7751
Model:   XGB, Pearson R: 0.6468
ET


# Feature selection

In [6]:
Feature_selection = feature_selection(model_name=selected_model,
                    input_index=input_index,des_std=des_std,yield_std=yield_std)
print('Model: %5s, Index of feature: %s, Pearson R: %.4f'%(selected_model,
                str(Feature_selection.selected_feature),Feature_selection.pear))

Model:    ET, Index of feature: [23, 14, 1, 0, 10], Pearson R: 0.8194


# Yield prediction

In [7]:
sorted_pre_yield = get_sorted_pre_yield(model_name=selected_model,
input_index=input_index,des_std=des_std,yield_std=yield_std,
selected_feature=Feature_selection.selected_feature)
#Yield top 20 reaction condition combination
format_output(sorted_pre_yield[:20])

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,GF/Pt,AcOH: TFE(1: 1),LiOAc,0.3 mA
1,2,GF/Pt,AcOH: TFE(1: 1),LiOAc,0.9 mA
2,3,GF/Pt,AcOH: TFE(1: 1),LiOAc,1.2 mA
3,4,GF/Pt,AcOH: TFE(1: 1),LiOAc,1.0 V
4,5,GF/Pt,AcOH: TFE(1: 1),LiOAc,1.5 V
5,6,GF/Pt,AcOH: TFE(1: 1),LiOAc,0.6 mA
6,7,BDD/BDD,AcOH: TFE(1: 1),LiOAc,0.3 mA
7,8,BDD/BDD,AcOH: TFE(1: 1),LiOAc,0.9 mA
8,9,BDD/BDD,AcOH: TFE(1: 1),LiOAc,1.2 mA
9,10,BDD/BDD,AcOH: TFE(1: 1),LiOAc,1.0 V


# Orthogonal selection

In [8]:
#Choose to do the experiment
orthogonal_selection = get_orthogonal_selection(n_round=n_round,
                    input_data=input_data,sorted_pre_yield=sorted_pre_yield)
format_output(orthogonal_selection)

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,4,GF/Pt,AcOH: TFE(1: 1),LiOAc,1.0 V
1,8,BDD/BDD,AcOH: TFE(1: 1),LiOAc,0.9 mA
2,15,BDD/GF,AcOH: TFE(1: 1),LiOAc,1.2 mA
3,29,BDD/Fe,AcOH: TFE(1: 1),LiOAc,0.6 mA
