# Start

In [1]:
import pandas as pd 
import numpy as np
import csv
import sys 
from os import sep
from script.utils import format_output,f_des_std,get_sorted_pre_yield_ep

# Load Data

In [2]:
df=pd.read_csv('./dataset/all_input_data.csv') 
em = df['Anode/Cathode'].to_list()
sol = df['Solvent'].to_list()
ele = df['Electrolyte'].to_list()
cp = df['Current/Potential'].to_list()
labels = np.array(df['Yield (%)'].to_list())
labels_std = np.array(labels)/100

em_set = list(set(em))
sol_set = list(set(sol))
ele_set = list(set(ele))
cp_set = list(set(cp))

# Generate descriptors

In [3]:
#electrolyte
ele_df = np.array(pd.read_excel('descriptor/descriptors of electrolytes.xlsx'))

ele_name_des={}
for i in ele_df:
    ele_name_des[i[0]]=i[1:]

#solvents
sol_df = np.array(pd.read_excel('descriptor/descriptors of solvents.xlsx'))
sol_name_des={}
for i in sol_df:
    sol_name_des[i[0]]=i[1:]

#em
em_df = np.array(pd.read_excel('descriptor/descriptors of electrodes.xlsx'))
em_name_des={}
for i in em_df:
    em_name_des[i[0]]=i[1:]

#cp
cp2onehot = {'0.3 mA':[1,0,0,0,0,0],'0.6 mA':[0,1,0,0,0,0],
             '0.9 mA':[0,0,1,0,0,0],'1.2 mA':[0,0,0,1,0,0],
             '1.0 V':[0,0,0,0,1,0],'1.5 V':[0,0,0,0,0,1]}

In [4]:
em_ep_list = ['Pt/Glassy carbon', 'Pt/RVC', 'Pt/Ni foam', 'Fe/Glassy carbon',
'Fe/RVC', 'Fe/Ni foam', 'GF/Glassy carbon', 'GF/RVC', 'GF/Ni foam',
'BDD/Glassy carbon', 'BDD/RVC', 'BDD/Ni foam', 'Glassy carbon/Pt',
'Glassy carbon/Fe', 'Glassy carbon/GF', 'Glassy carbon/BDD',
'Glassy carbon/Glassy carbon', 'Glassy carbon/RVC',
'Glassy carbon/Ni foam', 'RVC/Pt', 'RVC/Fe', 'RVC/GF', 'RVC/BDD',
'RVC/Glassy carbon', 'RVC/RVC', 'RVC/Ni foam', 'Ni foam/Pt',
'Ni foam/Fe', 'Ni foam/GF', 'Ni foam/BDD', 'Ni foam/Glassy carbon',
'Ni foam/RVC', 'Ni foam/Ni foam']
sol_ep_list = ['HFIP: AcOH(1: 1)', 'HFIP: TFE(1: 1)', 'HFIP: EtOH(1: 1)', 'HFIP: MeCN(1: 1)','HFIP']
ele_ep=['NaO2CAd','nBu4NOAc', 'nBu4NPF6', 'LiClO4', 'K3PO4', 'LiOAc', 'NaOAc', 'KOAc', 'NaOPiv']
sol_ep=[ 'AcOH: TFE(1: 1)','HFIP: AcOH(1: 1)', 'HFIP: TFE(1: 1)', 'HFIP: EtOH(1: 1)', 'HFIP: MeCN(1: 1)',
         'AcOH: EtOH(1: 1)', 'AcOH: MeCN(1: 1)', 'TFE: EtOH(1: 1)', 'TFE: MeCN(1: 1)', 
         'EtOH: MeCN(1: 1)', 'AcOH', 'TFE', 'EtOH', 'MeCN', 'HFIP']
em_ep=['GF/Pt', 'Fe/Fe', 'Fe/GF', 'Fe/Pt','Fe/RVC', 'Fe/Ni foam', 
       'GF/BDD', 'GF/Fe', 'GF/GF', 'Pt/BDD',
       'BDD/BDD', 'BDD/Fe', 'BDD/GF', 'BDD/Pt', 'Fe/BDD', 'Pt/Fe', 'Pt/GF', 'Pt/Pt', 
       'Pt/Glassy carbon', 'Pt/RVC', 'Pt/Ni foam', 'Fe/Glassy carbon',  'GF/Glassy carbon', 'GF/RVC', 'GF/Ni foam', 'BDD/Glassy carbon',
       'BDD/RVC', 'BDD/Ni foam', 'Glassy carbon/Pt', 'Glassy carbon/Fe', 'Glassy carbon/GF', 
       'Glassy carbon/BDD', 'Glassy carbon/Glassy carbon', 
       'Glassy carbon/RVC', 'Glassy carbon/Ni foam', 
       'RVC/Pt', 'RVC/Fe', 'RVC/GF', 'RVC/BDD', 'RVC/Glassy carbon', 'RVC/RVC', 'RVC/Ni foam', 'Ni foam/Pt', 'Ni foam/Fe', 
       'Ni foam/GF', 'Ni foam/BDD', 'Ni foam/Glassy carbon', 'Ni foam/RVC', 'Ni foam/Ni foam']
cp_ep=cp2onehot.keys()
ep_space=[]
for j in sol_ep:
    for i in ele_ep:
        for k in em_ep:
            for m in cp_ep:
                if k in em_ep_list or j in sol_ep_list: 
                    if i!='NaOPiv' and j !="AcOH" and j !="TFE"  and j!="AcOH: TFE(1: 1)" and k!='GF/Pt':
                        ep_space.append([i,j,k,m])
ep_space=np.array(ep_space)
ep_space.shape

(22608, 4)

In [5]:
ele_des = np.array([ele_name_des[i] for i in ele+list(ep_space[:,0])],dtype='float64')
sol_des = np.array([sol_name_des[i] for i in sol+list(ep_space[:,1])],dtype='float64')
em_des = np.array([em_name_des[i] for i in em+list(ep_space[:,2])],dtype='float64')
cp_des = np.array([cp2onehot[i] for i in cp+list(ep_space[:,3])]).reshape(-1,6)
des = np.concatenate((ele_des,sol_des,em_des,cp_des),axis=1)
des_std = f_des_std(des)

# Prediction

In [6]:
best_model_name='SVR'
input_index=list(range(len(labels_std)))
sorted_pre_yield,_ = get_sorted_pre_yield_ep(ep_space=ep_space,model_name=best_model_name,
input_index=input_index,des_std=des_std,labels_std=labels_std,
selected_feature=[ 12,28,5,0,32,1,35,2,31,3,4,30])   
format_output(sorted_pre_yield[:4])

Unnamed: 0,rank,Anode/Cathode,Solvent,Electrolyte,Current/Potential
0,1,NaO2CAd,HFIP: AcOH(1: 1),Fe/Fe,0.3 mA
1,2,NaO2CAd,HFIP: AcOH(1: 1),GF/Fe,0.3 mA
2,3,NaO2CAd,HFIP: AcOH(1: 1),BDD/Fe,0.3 mA
3,4,NaO2CAd,HFIP: AcOH(1: 1),Pt/Fe,0.3 mA
