In [1]:
import numpy as np
import pandas as pd
import os

import pickle
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold 
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler,scale,MinMaxScaler
from sklearn.metrics import mean_squared_error

import unittest

In [7]:
df_battery = pd.read_csv('../Battery Data/Battery_Dataset.csv')
df_battery.head()

Unnamed: 0,cell_id,study,Cycles,anode,cathode,electrolyte,Cycle,temperature,min_soc,max_soc,...,Min_Current (A),Max_Current (A),Min_Voltage (V),Max_Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.0,graphite,LFP,A123,50,15,0,100,...,-1.1,0.55,1.998,3.6,1.036,1.033,3.495,3.213,99.710425,91.93133
1,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.0,graphite,LFP,A123,100,15,0,100,...,-1.1,0.55,1.996,3.6,1.035,1.032,3.492,3.212,99.710145,91.981672
2,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.0,graphite,LFP,A123,150,15,0,100,...,-1.1,0.55,1.998,3.6,1.034,1.031,3.488,3.21,99.709865,92.029817
3,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.0,graphite,LFP,A123,200,15,0,100,...,-1.1,0.55,1.994,3.6,1.033,1.03,3.49,3.193,99.709584,91.489971
4,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.0,graphite,LFP,A123,250,15,0,100,...,-1.1,0.55,1.998,3.6,1.031,1.028,3.495,3.169,99.70902,90.672389


### One Hot Encoding dataset

In [8]:
def ohe_ACE(df):
    ohe = OneHotEncoder()
    ACE = df.loc[:,['anode','cathode','electrolyte']]
    ACE = ohe.fit_transform(ACE)
    df_encoded_ACE = pd.DataFrame(ACE.toarray())
    return df_encoded_ACE

In [9]:
df_encoded_ACE = ohe_ACE(df_battery)
df_encoded_ACE

Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
1,1.0,1.0,0.0,0.0,1.0,0.0,0.0
2,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3,1.0,1.0,0.0,0.0,1.0,0.0,0.0
4,1.0,1.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...
315,1.0,0.0,0.0,1.0,0.0,1.0,0.0
316,1.0,0.0,0.0,1.0,0.0,1.0,0.0
317,1.0,0.0,0.0,1.0,0.0,1.0,0.0
318,1.0,0.0,0.0,1.0,0.0,1.0,0.0


In [10]:
def ohe_dataframe(df):
    df_ACE_col_name = ['A1','C1','C2','C3','E1','E2','E3']
    df_encoded_ACE = ohe_ACE(df)
    for i in range(len(df_encoded_ACE.columns)):
        df_encoded_ACE = df_encoded_ACE.rename({df_encoded_ACE.columns[i]: df_ACE_col_name[i]}, axis=1) 
    df_ohe = pd.concat([df_encoded_ACE, df],axis=1)
    return df_ohe

In [11]:
df_enbattery = ohe_dataframe(df_battery)
df_enbattery

Unnamed: 0,A1,C1,C2,C3,E1,E2,E3,cell_id,study,Cycles,...,Min_Current (A),Max_Current (A),Min_Voltage (V),Max_Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.00,...,-1.1,0.55,1.998,3.6,1.036,1.033,3.495,3.213,99.710425,91.931330
1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.00,...,-1.1,0.55,1.996,3.6,1.035,1.032,3.492,3.212,99.710145,91.981672
2,1.0,1.0,0.0,0.0,1.0,0.0,0.0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.00,...,-1.1,0.55,1.998,3.6,1.034,1.031,3.488,3.210,99.709865,92.029817
3,1.0,1.0,0.0,0.0,1.0,0.0,0.0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.00,...,-1.1,0.55,1.994,3.6,1.033,1.030,3.490,3.193,99.709584,91.489971
4,1.0,1.0,0.0,0.0,1.0,0.0,0.0,SNL_18650_LFP_15C_0-100_0.5-1C_a,snl,4551.00,...,-1.1,0.55,1.998,3.6,1.031,1.028,3.495,3.169,99.709020,90.672389
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,1.0,0.0,0.0,1.0,0.0,1.0,0.0,SNL_18650_NMC_35C_0-100_0.5-2C_a,snl,786,...,-6.0,1.50,1.996,4.2,2.529,2.527,9.814,8.388,99.920917,85.469737
316,1.0,0.0,0.0,1.0,0.0,1.0,0.0,SNL_18650_NMC_35C_0-100_0.5-2C_a,snl,786,...,-6.0,1.50,1.994,4.2,2.518,2.506,9.769,8.332,99.523431,85.290204
317,1.0,0.0,0.0,1.0,0.0,1.0,0.0,SNL_18650_NMC_35C_0-100_0.5-2C_a,snl,786,...,-6.0,1.50,1.995,4.2,2.484,2.482,9.650,8.197,99.919485,84.943005
318,1.0,0.0,0.0,1.0,0.0,1.0,0.0,SNL_18650_NMC_35C_0-100_0.5-2C_a,snl,786,...,-6.0,1.50,1.995,4.2,2.477,2.478,9.617,8.208,100.040371,85.348861


### Split Data

In [12]:
def data_split (df, test_ratio, output, seed):
    np.random.seed(seed)
    
    df_enbattery = ohe_dataframe(df)

    total_row = df_enbattery.shape[0]
    test_row = round(total_row *test_ratio)
    train_row = total_row -test_row
    
    indices =np.random.permutation(total_row)
    train_indx, test_idx =indices[:train_row], indices[train_row:]
    train,test = df_enbattery.iloc[train_indx,:], df_enbattery.iloc[test_idx,:]
    
    X_test = test[['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_test = test[[output]]
    
    X_train = train[['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_train = train[[output]]
    
    return train,test,X_train,y_train,X_test,y_test

In [13]:
train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, 'Discharge_Capacity (Ah)', 66)

### Kfolds

In [14]:
def kfold(n_split,rand_state):
    kf = KFold(n_splits=n_split, random_state=rand_state, shuffle= True)
    return kf

In [15]:
kf = kfold(10,66)

### Scale train and test sets

In [16]:
def data_scale(X_train, X_test):
        
    scaler = StandardScaler(with_mean=True,with_std=True)

    scaler.fit(X_train)
    X_train_scaled = pd.DataFrame(scaler.transform(X_train))
    
    scaler.fit(X_test)
    X_test_scaled = pd.DataFrame(scaler.transform(X_test))
    
    return X_train_scaled,X_test_scaled

In [17]:
train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, 'Discharge_Capacity (Ah)', 66)
X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
X_test_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,-0.649786,1.381699,-0.748983,-0.649786,-0.748983,1.381699,1.449166,1.521730,-0.479632
1,0.0,1.538968,-0.723747,-0.748983,1.538968,-0.748983,-0.723747,0.751823,-0.049088,1.712972
2,0.0,-0.649786,-0.723747,1.335144,-0.649786,1.335144,-0.723747,0.403152,-1.619906,-0.479632
3,0.0,1.538968,-0.723747,-0.748983,1.538968,-0.748983,-0.723747,0.054480,-1.619906,0.616670
4,0.0,-0.649786,1.381699,-0.748983,-0.649786,-0.748983,1.381699,0.054480,1.521730,-0.479632
...,...,...,...,...,...,...,...,...,...,...
59,0.0,-0.649786,1.381699,-0.748983,-0.649786,-0.748983,1.381699,-0.991535,-1.619906,-0.479632
60,0.0,1.538968,-0.723747,-0.748983,1.538968,-0.748983,-0.723747,-1.340206,-0.049088,1.712972
61,0.0,1.538968,-0.723747,-0.748983,1.538968,-0.748983,-0.723747,1.449166,1.521730,0.616670
62,0.0,-0.649786,-0.723747,1.335144,-0.649786,1.335144,-0.723747,0.403152,1.521730,-0.479632


### GridSearchCV for Hyperparameters

In [18]:
def grid_knn_hp(lower, upper, df_battery, output):
    param_grid = {'n_neighbors':range(lower,upper),
                  'weights':['uniform', 'distance'],
                  'algorithm':['auto', 'ball_tree', 'kd_tree'],
                 }


    grid_search = GridSearchCV(KNeighborsRegressor(), 
                               param_grid,
                               cv=5
                              )
    
    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    grid_search.fit(X_train_scaled, y_train)
    
    best_knn_hp = list(grid_search.best_params_.values())
    alg = best_knn_hp[0]
    n_neigh = best_knn_hp[1]
    weight = best_knn_hp[2]
    
    return  alg, n_neigh, weight

In [19]:
alg, n_neigh, weight = grid_knn_hp(1, 51, df_battery, 'Discharge_Capacity (Ah)')
print(alg, n_neigh, weight)

ball_tree 12 uniform


### Train machine learning models

In [21]:
def knn_train(df, output):
    np.random.seed(66)
    
    alg, n_neigh, weight = grid_knn_hp(1, 51, df, output)
    KNN_model_train =KNeighborsRegressor( algorithm = alg, n_neighbors=n_neigh, weights = weight)

    train_results =[]
    train_results_name =['Experimental','Predicted ','RMSE']

    kf = kfold(10,66)

    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    
    for train_index, test_index in kf.split(X_train_scaled):
        X_training, X_validate = X_train_scaled.iloc[train_index], X_train_scaled.iloc[test_index]
        y_training, y_validate = y_train.iloc[train_index], y_train.iloc[test_index]

        np.random.seed(66)
        KNN_model_train.fit(X_training,y_training)
        y_train_predicted = KNN_model_train.predict(X_validate)

        mse = mean_squared_error(y_train_predicted, y_validate)
        RMSE = np.sqrt(mse)

        train_results.append([y_validate,y_train_predicted,RMSE])
    
    Train_results = pd.DataFrame (train_results,columns=train_results_name)
    RMSE_avg = np.average(Train_results['RMSE'])
    
    return RMSE_avg

In [22]:
knn_train_RMSE_avg = knn_train(df_battery, 'Discharge_Capacity (Ah)')
knn_train_RMSE_avg

0.5417574520648624

### Test Machine learning models

In [23]:
def knn_test(df, output):
    np.random.seed(66)
    
    alg, n_neigh, weight = grid_knn_hp(1, 51, df, output)
    KNN_model = KNeighborsRegressor( algorithm = alg, n_neighbors=n_neigh, weights = weight)
    
    train,test,X_train,y_train,X_test,y_test = data_split(df_battery, 0.2, output, 66)
    X_train_scaled, X_test_scaled = data_scale(X_train, X_test)
    
    KNN_model.fit(X_train_scaled,y_train)
    y_predict=KNN_model.predict(X_test_scaled)
    
    mse = mean_squared_error(y_predict, y_test)
    RMSE = np.sqrt(mse)
    return RMSE

In [24]:
knn_test_RMSE = knn_test(df_battery, 'Discharge_Capacity (Ah)')
knn_test_RMSE

0.5379997712882878

### Prepare user battery set for predictor

In [25]:
def X_set_in(df):
    ac = df_battery['anode'].unique()
    cc = df_battery['cathode'].unique()
    ec = df_battery['electrolyte'].unique()
    cycle = df_battery['Cycle'].unique()
    tc = df_battery['temperature'].unique()
    cr = df_battery['discharge_crate'].unique()

    X_temp = []
    for a in range(len(ac)):
        for b in range(len(cc)):
            for c in range(len(ec)):
                for d in range(len(cycle)):
                    for e in range(len(tc)):
                        for f in range(len(cr)):
                            X_temp.append([ac[a],cc[b],ec[c],cycle[d],tc[e],cr[f]])
                        
    column_names = ['anode','cathode','electrolyte','Cycle','temperature','discharge_crate']
    X_set = pd.DataFrame(X_temp, columns = column_names)
        
    return X_set

In [26]:
X_set = X_set_in(df_battery)
X_set

Unnamed: 0,anode,cathode,electrolyte,Cycle,temperature,discharge_crate
0,graphite,LFP,A123,50,15,1.0
1,graphite,LFP,A123,50,15,2.0
2,graphite,LFP,A123,50,15,0.5
3,graphite,LFP,A123,50,15,3.0
4,graphite,LFP,A123,50,25,1.0
...,...,...,...,...,...,...
1075,graphite,NMC,LGC,500,25,3.0
1076,graphite,NMC,LGC,500,35,1.0
1077,graphite,NMC,LGC,500,35,2.0
1078,graphite,NMC,LGC,500,35,0.5


In [27]:
def X_set_en(df):
    X_set = X_set_in(df)
    
    ohe = OneHotEncoder()
    ACE = df.loc[:,['anode','cathode','electrolyte']]
    ohe.fit_transform(ACE)
    
    filehandler = open("ohe.obj","wb")
    pickle.dump(ohe,filehandler)
    filehandler.close()
    file = open("ohe.obj",'rb')
    ohe_loaded = pickle.load(file)
    file.close()

    ace = X_set.loc[:,['anode','cathode','electrolyte']]
    ace = ohe_loaded.transform(ace)
    ace = pd.DataFrame(ace.toarray())
    
    X_num = X_set.loc[:,['Cycle','temperature','discharge_crate']]

    df_ace_col_name = ['A1','C1','C2','C3','E1','E2','E3']
    for i in range(len(ace.columns)):
        ace = ace.rename({ace.columns[i]: df_ace_col_name[i]}, axis=1) 
    X_set = pd.concat([ace, X_num],axis=1)
    return X_set

In [28]:
X_set = X_set_en(df_battery)
X_set

Unnamed: 0,A1,C1,C2,C3,E1,E2,E3,Cycle,temperature,discharge_crate
0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,50,15,1.0
1,1.0,1.0,0.0,0.0,1.0,0.0,0.0,50,15,2.0
2,1.0,1.0,0.0,0.0,1.0,0.0,0.0,50,15,0.5
3,1.0,1.0,0.0,0.0,1.0,0.0,0.0,50,15,3.0
4,1.0,1.0,0.0,0.0,1.0,0.0,0.0,50,25,1.0
...,...,...,...,...,...,...,...,...,...,...
1075,1.0,0.0,0.0,1.0,0.0,1.0,0.0,500,25,3.0
1076,1.0,0.0,0.0,1.0,0.0,1.0,0.0,500,35,1.0
1077,1.0,0.0,0.0,1.0,0.0,1.0,0.0,500,35,2.0
1078,1.0,0.0,0.0,1.0,0.0,1.0,0.0,500,35,0.5


### Predictor

In [29]:
def df_prep(df, output):    
    df_encoded_ACE = ohe_ACE(df)
    df_enbattery = ohe_dataframe(df)
    
    X_bat = df_enbattery.loc[:,['A1','C1','C2','C3','E1','E2','E3','Cycle','temperature','discharge_crate']]
    y_bat = df_enbattery.loc[:,[output]]
    
    return X_bat, y_bat

In [30]:
X_bat, y_bat = df_prep(df_battery, 'Discharge_Capacity (Ah)')
print(X_bat)
print(y_bat)

      A1   C1   C2   C3   E1   E2   E3  Cycle  temperature  discharge_crate
0    1.0  1.0  0.0  0.0  1.0  0.0  0.0     50           15              1.0
1    1.0  1.0  0.0  0.0  1.0  0.0  0.0    100           15              1.0
2    1.0  1.0  0.0  0.0  1.0  0.0  0.0    150           15              1.0
3    1.0  1.0  0.0  0.0  1.0  0.0  0.0    200           15              1.0
4    1.0  1.0  0.0  0.0  1.0  0.0  0.0    250           15              1.0
..   ...  ...  ...  ...  ...  ...  ...    ...          ...              ...
315  1.0  0.0  0.0  1.0  0.0  1.0  0.0    300           35              1.0
316  1.0  0.0  0.0  1.0  0.0  1.0  0.0    350           35              1.0
317  1.0  0.0  0.0  1.0  0.0  1.0  0.0    400           35              1.0
318  1.0  0.0  0.0  1.0  0.0  1.0  0.0    450           35              1.0
319  1.0  0.0  0.0  1.0  0.0  1.0  0.0    500           35              1.0

[320 rows x 10 columns]
     Discharge_Capacity (Ah)
0                      1.033
1    

In [31]:
def battery_predictor(df, output):
    X_bat, y_bat = df_prep(df, output)
    X_set = X_set_en(df)
    X_bat_scaled, X_set_scaled = data_scale(X_bat, X_set)
    
    best_knn_hp = grid_knn_hp(1, 51, df, output)
    alg = best_knn_hp[0]
    n_neigh = best_knn_hp[1]
    weight = best_knn_hp[2]

    np.random.seed(66)
    KNN_model =KNeighborsRegressor(algorithm=alg, n_neighbors=n_neigh, weights=weight)
    
    KNN_model.fit(X_bat_scaled,y_bat)
    y_predict=KNN_model.predict(X_set_scaled)
    return y_predict

In [32]:
y_pred = battery_predictor(df_battery, 'Discharge_Capacity (Ah)')
y_pred

array([[0.82116667],
       [1.03016667],
       [0.82316667],
       ...,
       [2.43816667],
       [2.2115    ],
       [1.73475   ]])

### Report Generator

In [33]:
def report_gen(df):
    in_set = X_set_in(df)
    
    CC = pd.DataFrame(battery_predictor(df, 'Charge_Capacity (Ah)'), columns = ['Charge_Capacity (Ah)'])
    DC = pd.DataFrame(battery_predictor(df, 'Discharge_Capacity (Ah)'), columns = ['Discharge_Capacity (Ah)'])
    CE = pd.DataFrame(battery_predictor(df, 'Charge_Energy (Wh)'), columns = ['Charge_Energy (Wh)'])
    DE = pd.DataFrame(battery_predictor(df, 'Discharge_Energy (Wh)'), columns = ['Discharge_Energy (Wh)'])
    CEff = pd.DataFrame(battery_predictor(df, 'Coulombic_Efficiency (%)'), columns = ['Coulombic_Efficiency (%)'])
    EEff = pd.DataFrame(battery_predictor(df, 'Energy_Efficiency (%)'), columns = ['Energy_Efficiency (%)'])
    
    report = pd.concat([in_set,CC,DC,CE,DE,CEff,EEff],axis=1)
    return report

In [34]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
report = report_gen(df_battery)
report

Unnamed: 0,anode,cathode,electrolyte,Cycle,temperature,discharge_crate,Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Coulombic_Efficiency (%),Energy_Efficiency (%)
0,graphite,LFP,A123,50,15,1.0,0.821,0.821167,2.756583,2.602583,100.200532,92.850664
1,graphite,LFP,A123,50,15,2.0,1.030083,1.030167,3.46225,3.17725,100.041947,90.123165
2,graphite,LFP,A123,50,15,0.5,0.823,0.823167,2.763583,2.612417,100.196175,93.147441
3,graphite,LFP,A123,50,15,3.0,0.7335,0.7335,2.456667,2.23375,100.00671,90.545102
4,graphite,LFP,A123,50,25,1.0,0.712917,0.714667,2.384917,2.296083,100.196184,96.060598
5,graphite,LFP,A123,50,25,2.0,0.899,0.899583,3.003167,2.804917,100.073503,93.906705
6,graphite,LFP,A123,50,25,0.5,0.712917,0.714667,2.384917,2.296083,100.208908,96.865115
7,graphite,LFP,A123,50,25,3.0,0.703333,0.702917,2.34925,2.164167,100.025904,91.844297
8,graphite,LFP,A123,50,35,1.0,0.869167,0.869833,2.908167,2.756333,100.161855,94.780514
9,graphite,LFP,A123,50,35,2.0,1.043833,1.043833,3.491417,3.25925,100.048275,92.784812


### Plot Generator