In [1]:
import numpy as np
import pandas as pd
import os

# import statistics as stcs
# import scipy.stats as stats

import pickle
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold 
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler,scale,MinMaxScaler
from sklearn.metrics import mean_squared_error

import unittest

In [2]:
df_battery = pd.read_csv('Battery_Dataset.csv')

### One Hot Encoding dataset

In [3]:
def ohe_ACE(df):
    ohe = OneHotEncoder()
    ACE = df.loc[:,['anode','cathode','electrolyte']]
    ACE = ohe.fit_transform(ACE)
    df_encoded_ACE = pd.DataFrame(ACE.toarray())
    return df_encoded_ACE

In [4]:
# df_encoded_ACE = ohe_ACE(df_battery)
# df_encoded_ACE

In [5]:
def ohe_dataframe(df):
    df_ACE_col_name = ['A1','C1','C2','C3','E1','E2','E3']
    df_encoded_ACE = ohe_ACE(df)
    for i in range(len(df_encoded_ACE.columns)):
        df_encoded_ACE = df_encoded_ACE.rename({df_encoded_ACE.columns[i]: df_ACE_col_name[i]}, axis=1) 
    df_ohe = pd.concat([df_encoded_ACE, df],axis=1)
    return df_ohe

In [6]:
# df_enbattery = ohe_dataframe(df_battery)
# df_enbattery

### Split Data

In [7]:
def data_split (df, test_ratio, output, seed):
    np.random.seed(seed)
    
    df_enbattery = ohe_dataframe(df)

    total_row = df_enbattery.shape[0]
    test_row = round(total_row *test_ratio)
    train_row = total_row -test_row
    
    indices =np.random.permutation(total_row)
    train_indx, test_idx =indices[:train_row], indices[train_row:]
    train,test = df_enbattery.iloc[train_indx,:], df_enbattery.iloc[test_idx,:]
    
    X_test = test[['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_test = test[[output]]
    
    X_train = train[['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_train = train[[output]]
    
    return train,test,X_train,y_train,X_test,y_test

#train and test are full dataframes
#X_train,y_train,X_test,y_test are dataframes that include only the inputs

In [8]:
# train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, 'Discharge_Capacity (Ah)', 66)

### Kfolds

In [9]:
def kfold(n_split,rand_state):
    kf = KFold(n_splits=n_split, random_state=rand_state, shuffle= True)
    return kf

In [10]:
# kf = kfold(10,66)

### Scale train and test sets

In [11]:
def data_scale(X_train, X_test):
        
    scaler = StandardScaler(with_mean=True,with_std=True)

    scaler.fit(X_train)
    X_train_scaled = pd.DataFrame(scaler.transform(X_train))
    
    scaler.fit(X_test)
    X_test_scaled = pd.DataFrame(scaler.transform(X_test))
    
    return X_train_scaled,X_test_scaled

In [12]:
# train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, 'Discharge_Capacity (Ah)', 66)
# X_train_scaled, X_test_scaled = data_scale(X_train, X_test)

### GridSearchCV for Hyperparameters

In [13]:
def grid_knn_hp(lower, upper, df, output):
    param_grid = {'n_neighbors':range(lower,upper),
                  'weights':['uniform', 'distance'],
                  'algorithm':['auto', 'ball_tree', 'kd_tree'],
                 }


    grid_search = GridSearchCV(KNeighborsRegressor(), 
                               param_grid,
                               cv=5
                              )
    
    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    grid_search.fit(X_train_scaled, y_train)
    
    best_knn_hp = list(grid_search.best_params_.values())
    alg = best_knn_hp[0]
    n_neigh = best_knn_hp[1]
    weight = best_knn_hp[2]
    
    return  alg, n_neigh, weight

In [14]:
# alg, n_neigh, weight = grid_knn_hp(1, 51, df_battery, 'Discharge_Capacity (Ah)')
# print(alg, n_neigh, weight)

### Train machine learning models

In [15]:
def knn_train(df, output):
    np.random.seed(66)
    
    alg, n_neigh, weight = grid_knn_hp(1, 51, df, output)
    KNN_model_train =KNeighborsRegressor( algorithm = alg, n_neighbors=n_neigh, weights = weight)

    train_results =[]
    train_results_name =['Experimental','Predicted ','RMSE']

    kf = kfold(10,66)

    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    
    for train_index, test_index in kf.split(X_train_scaled):
        X_training, X_validate = X_train_scaled.iloc[train_index], X_train_scaled.iloc[test_index]
        y_training, y_validate = y_train.iloc[train_index], y_train.iloc[test_index]

        np.random.seed(66)
        KNN_model_train.fit(X_training,y_training)
        y_train_predicted = KNN_model_train.predict(X_validate)

        mse = mean_squared_error(y_train_predicted, y_validate)
        RMSE = np.sqrt(mse)

        train_results.append([y_validate,y_train_predicted,RMSE])
    
    Train_results = pd.DataFrame (train_results,columns=train_results_name)
    RMSE_avg = np.average(Train_results['RMSE'])
    
    return RMSE_avg

In [16]:
# knn_train_RMSE_avg = knn_train(df_battery, 'Discharge_Capacity (Ah)')
# knn_train_RMSE_avg

### Test Machine learning models

In [17]:
def knn_test(df, output):
    np.random.seed(66)
    
    alg, n_neigh, weight = grid_knn_hp(1, 51, df, output)
    KNN_model = KNeighborsRegressor( algorithm = alg, n_neighbors=n_neigh, weights = weight)
    
    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    
    KNN_model.fit(X_train_scaled,y_train)
    y_predict=KNN_model.predict(X_test_scaled)
    
    mse = mean_squared_error(y_predict, y_test)
    RMSE = np.sqrt(mse)
    return RMSE

In [18]:
# knn_test_RMSE = knn_test(df_battery, 'Discharge_Capacity (Ah)')
# knn_test_RMSE

### Prepare user battery set for predictor

In [19]:
def X_set_prep(df, desc):
    ohe = OneHotEncoder()
    ACE = df.loc[:,['anode','cathode','electrolyte']]
    ohe.fit_transform(ACE)
    
    filehandler = open("ohe.obj","wb")
    pickle.dump(ohe,filehandler)
    filehandler.close()

    file = open("ohe.obj",'rb')
    ohe_loaded = pickle.load(file)
    file.close()

    ec = df_battery['electrolyte'].unique()
    column_names = ['anode','cathode','electrolyte','Cycle','temperature','discharge_crate']
    X_set = pd.DataFrame(columns = column_names)
    for i in range(len(ec)):
        X_set.loc[i] = [desc[0],desc[1],ec[i],desc[2],desc[3],desc[4]]
    
    ace = X_set.loc[:,['anode','cathode','electrolyte']]
    ace = ohe_loaded.transform(ace)
    ace = pd.DataFrame(ace.toarray())
    
    X_num = X_set.loc[:,['Cycle','temperature','discharge_crate']]

    df_ace_col_name = ['A1','C1','C2','C3','E1','E2','E3']
    for i in range(len(ace.columns)):
        ace = ace.rename({ace.columns[i]: df_ace_col_name[i]}, axis=1) 
    X_set = pd.concat([ace, X_num],axis=1)
    
    return X_set

In [20]:
# description = ['graphite','NMC', 150, 25, 1.0] #What I expect to come out of GUI
# X_set = X_set_prep(df_battery, description)
# X_set

### Predictor

In [21]:
def df_prep(df, output):    
    df_encoded_ACE = ohe_ACE(df)
    df_enbattery = ohe_dataframe(df)
    
    X_bat = df_enbattery.loc[:,['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_bat = df_enbattery.loc[:,[output]]
    
    return X_bat, y_bat

In [22]:
# X_bat, y_bat = df_prep(df_battery, 'Discharge_Capacity (Ah)')
# print(X_bat)
# print(y_bat)

In [23]:
def battery_predictor(df, output, desc):
    X_bat, y_bat = df_prep(df, output)
    X_set = X_set_prep(df, desc)
    X_bat_scaled, X_set_scaled = data_scale(X_bat, X_set)
    
    best_knn_hp = grid_knn_hp(1, 51, df, output)
    alg = best_knn_hp[0]
    n_neigh = best_knn_hp[1]
    weight = best_knn_hp[2]

    np.random.seed(66)
    KNN_model =KNeighborsRegressor(algorithm=alg, n_neighbors=n_neigh, weights=weight)
    
    KNN_model.fit(X_bat_scaled,y_bat)
    y_predict=KNN_model.predict(X_set_scaled)
    return y_predict

In [24]:
# description = ['graphite','NMC', zzzz, 15, 1.0] #What I expect to come out of GUI
# battery_predictor(df_battery, 'Discharge_Capacity (Ah)', description)

# **testing____**

In [25]:
X_bat, y_bat = df_prep(df_battery, 'Discharge_Capacity (Ah)')
y_bat

Unnamed: 0,Discharge_Capacity (Ah)
0,1.033
1,1.032
2,1.031
3,1.030
4,1.028
...,...
315,2.527
316,2.506
317,2.482
318,2.478


In [26]:
description = ['graphite','NMC', 50, 15, 1.0]
X_set = X_set_prep(df_battery, description)
X_set

Unnamed: 0,A1,C1,C2,C3,E1,E2,E3,Cycle,temperature,discharge_crate
0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,50,15,1.0
1,1.0,0.0,0.0,1.0,0.0,0.0,1.0,50,15,1.0
2,1.0,0.0,0.0,1.0,0.0,1.0,0.0,50,15,1.0


In [27]:
X_bat_scaled, X_set_scaled = data_scale(X_bat, X_set)
X_bat_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,1.290994,-0.625543,-0.723747,1.290994,-0.723747,-0.625543,-1.566699,-1.761410,-0.496252
1,0.0,1.290994,-0.625543,-0.723747,1.290994,-0.723747,-0.625543,-1.218544,-1.761410,-0.496252
2,0.0,1.290994,-0.625543,-0.723747,1.290994,-0.723747,-0.625543,-0.870388,-1.761410,-0.496252
3,0.0,1.290994,-0.625543,-0.723747,1.290994,-0.723747,-0.625543,-0.522233,-1.761410,-0.496252
4,0.0,1.290994,-0.625543,-0.723747,1.290994,-0.723747,-0.625543,-0.174078,-1.761410,-0.496252
...,...,...,...,...,...,...,...,...,...,...
315,0.0,-0.774597,-0.625543,1.381699,-0.774597,1.381699,-0.625543,0.174078,1.654658,-0.496252
316,0.0,-0.774597,-0.625543,1.381699,-0.774597,1.381699,-0.625543,0.522233,1.654658,-0.496252
317,0.0,-0.774597,-0.625543,1.381699,-0.774597,1.381699,-0.625543,0.870388,1.654658,-0.496252
318,0.0,-0.774597,-0.625543,1.381699,-0.774597,1.381699,-0.625543,1.218544,1.654658,-0.496252


In [28]:
best_knn_hp = grid_knn_hp(1, 51, df_battery, 'Discharge_Capacity (Ah)')
alg = best_knn_hp[0]
n_neigh = best_knn_hp[1]
weight = best_knn_hp[2]

print(alg, n_neigh, weight)

auto 11 uniform


In [29]:

np.random.seed(66)
KNN_model =KNeighborsRegressor(algorithm=alg, n_neighbors=n_neigh, weights=weight)

KNN_model.fit(X_bat_scaled,y_bat)
y_predict=KNN_model.predict(X_set_scaled)

y_predict

array([[1.02181818],
       [2.65581818],
       [2.49318182]])

# **testing____**

### Report Generator

In [30]:
def report_gen(df, desc):
    ec = df_battery['electrolyte'].unique()
    column_names = ['anode','cathode','electrolyte','Cycle','temperature','discharge_crate']
    user_set = pd.DataFrame(columns = column_names)
    for i in range(len(ec)):
        user_set.loc[i] = [description[0],description[1],ec[i],description[2],description[3],description[4]]
    
    CC = pd.DataFrame(battery_predictor(df, 'Charge_Capacity (Ah)', desc), columns = ['Charge_Capacity (Ah)'])
    DC = pd.DataFrame(battery_predictor(df, 'Discharge_Capacity (Ah)', desc), columns = ['Discharge_Capacity (Ah)'])
    CE = pd.DataFrame(battery_predictor(df, 'Charge_Energy (Wh)', desc), columns = ['Charge_Energy (Wh)'])
    DE = pd.DataFrame(battery_predictor(df, 'Discharge_Energy (Wh)', desc), columns = ['Discharge_Energy (Wh)'])
    CEff = pd.DataFrame(battery_predictor(df, 'Coulombic_Efficiency (%)', desc), columns = ['Coulombic_Efficiency (%)'])
    EEff = pd.DataFrame(battery_predictor(df, 'Energy_Efficiency (%)', desc), columns = ['Energy_Efficiency (%)'])
    
    report = pd.concat([user_set,CC,DC,CE,DE,CEff,EEff],axis=1)
    return report

In [31]:
description = ['graphite','NMC', 50, 15, 1.0]
report_gen(df_battery, description)

Unnamed: 0,anode,cathode,electrolyte,Cycle,temperature,discharge_crate,Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,graphite,NMC,A123,50,15,1.0,1.020091,1.021818,3.407818,3.219,100.273117,94.334144
1,graphite,NMC,Pan,50,15,1.0,2.653818,2.655818,10.363091,8.552083,100.260541,82.723206
2,graphite,NMC,LGC,50,15,1.0,2.491909,2.493182,9.676909,8.576583,99.7636,87.808752


In [32]:
description = ['graphite','LFP', 50, 25, 1.0]
report_gen(df_battery, description)

Unnamed: 0,anode,cathode,electrolyte,Cycle,temperature,discharge_crate,Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,graphite,LFP,A123,50,25,1.0,1.020091,1.021818,3.407818,3.219,100.273117,94.334144
1,graphite,LFP,Pan,50,25,1.0,2.653818,2.655818,10.363091,8.552083,100.260541,82.723206
2,graphite,LFP,LGC,50,25,1.0,2.491909,2.493182,9.676909,8.576583,99.7636,87.808752


In [33]:
description = ['graphite','LFP', 350, 25, 0.5]
report_gen(df_battery, description)

Unnamed: 0,anode,cathode,electrolyte,Cycle,temperature,discharge_crate,Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,graphite,LFP,A123,350,25,0.5,1.020091,1.021818,3.407818,3.219,100.273117,94.334144
1,graphite,LFP,Pan,350,25,0.5,2.653818,2.655818,10.363091,8.552083,100.260541,82.723206
2,graphite,LFP,LGC,350,25,0.5,2.491909,2.493182,9.676909,8.576583,99.7636,87.808752


**the results are all the same?**

### Plot Generator