In [77]:
from script.condition import activity, boundary_condition

import pandas as pd
import numpy as np
import tqdm
import os
import joblib

from sklearn.preprocessing import MinMaxScaler, StandardScaler 

## Make Scaler

In [78]:
products = ['formate','co','c1','h2'] # expected CO2RR products
eU_lst = [-1.65,-1.60,-1.55,-1.50,-1.45,-1.40,-1.35,-1.30,-1.25,
          -1.20,-1.15,-1.10,-1.05,-1.00,-0.95,-0.90,-0.85,-0.80,
          -0.75,-0.70,-0.65,-0.60,-0.55,-0.50,-0.45] # list of applied potential

eU_dict = dict()

data_path = 'data/energy/'
csv_lst =  os.listdir(data_path) # collect Dataframe with predicted energy of alloy


for eU in tqdm.tqdm(eU_lst):
    dens_dict = dict()
    
    for csv in csv_lst:
        df = pd.read_csv(data_path + csv, index_col=0)
        df.drop_duplicates(['FNN','Same','Sub'], inplace = True) # remain only unique active motifs
        name = csv.split('.')[0]
        
        co_lst = df['co'].to_numpy()
        oh_lst = df['oh'].to_numpy()
        h_lst  = df['h'].to_numpy()
        
        total = len(df)
        density = np.zeros(4)
        
        for x,y,z in zip(co_lst, h_lst, oh_lst):
            product = boundary_condition(x,y,z,eU) # determine product by boundary conditon
            barrier = activity(x,y,z,eU,product)  # calculate thermodynamic barrier
            
            if barrier <= 0: # count active motifs with no uphill at the applied potential
                density[products.index(product)] += 1 
        
        density /= total # calculate density 
        dens_dict[name] = density
    
    eU_dict[eU] = dens_dict        

100%|██████████| 25/25 [13:16<00:00, 31.88s/it]


In [80]:
# generate Scaler to normalize selectivity

scaler_dict = {}

for eU in eU_lst:
    dens_dict = eU_dict[eU]
    scaler_lst = []
    
    for idx,product in enumerate(products):
        density = []
        for key in dens_dict.keys():
            dens = dens_dict[key]
            density.append(dens[idx])
        
        scaler = MinMaxScaler()
        fitted_scaler = scaler.fit(np.array(density).reshape(-1,1)) 
#         print(fitted_scaler.data_max_)
        scaler_lst.append(fitted_scaler)
        
    scaler_dict[eU] = scaler_lst
    
joblib.dump(scaler_dict, 'data/scaler_dict.pkl')

['data/scaler_dict.pkl']