In [None]:
import sys
sys.path.append("../.")
import warnings
warnings.filterwarnings('ignore')

import pickle
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

from tqdm import tqdm
from matplotlib import cm
from matplotlib.tri import Triangulation
from ast import literal_eval
from sklearn.preprocessing import MinMaxScaler

from dstar.apps import calculator, mask

# Generate own scaler

In [None]:
eU_lst = np.arange(-1.6, -0.7, 0.05) # Range of applied potential to calculate producitivty
eU_dict = {}
scaler_by_eU = {}

path = './energy/' # Path for dataframes with predicted binding energies
csv_lst = os.listdir(path)

for eU in eU_lst:
    g_dict = {}
    num_dict = {}
    scalers = []
    
    for csv in tqdm(csv_lst):
        df = pd.read_csv(path+csv)
        df.drop_duplicates(['FNN','Same','Sub'],inplace=True)
        csv = csv.split('.')[0]
        el_1, el_2 = csv.split('_')
        
        masked_df = df
        name = csv

        # Predicted binding energies
        co_arr = masked_df['co'].to_numpy()
        h_arr = masked_df['h'].to_numpy()
        oh_arr = masked_df['oh'].to_numpy()

        g_max_by_product= calculator.get_activity(co_arr, h_arr, oh_arr, eU) # Calculate delta_G_max of active motifs from the binding energy
        g_dict[name] = g_max_by_product # Save delta_G_max to dictionary by elemental combinations
        num_dict[name] = len(co_arr) # Save the number of active motifs

    name_lst = sorted(list(g_dict.keys()))
    
    productivity_dict ={}
    for name in name_lst:
        total_num = num_dict[name]
        productivity = calculator.boltzman_product(g_dict[name],total_num) # Calculate raw productivity from the delta_G_max
        productivity_dict[name] = productivity
    
    arr = np.array(list(productivity_dict.values())).T # Transpose for product-wise scaling
    
    for i in range(4):
        scaler = MinMaxScaler()
        arr[i] = scaler.fit_transform(np.array(arr[i]).reshape(-1,1)).flatten() # Apply MinMaxScaler to productivity
        scalers.append(scaler)
    
    for name,prod in zip(name_lst,arr.T):
        productivity_dict[name] = prod
    
    # Save productivity and scaler
    eU_dict[eU] = productivity_dict 
    scaler_by_eU[eU] = scalers
    
with open('./utils/scaler.pkl','wb') as fw:
    pickle.dump(scaler_by_eU,fw)