This is the main shared file where we will execute our code. 

In [1]:
# load in our packages
# Import cell

import pandas as pd
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import scipy

# from pymatgen.core.structure import Structure
# from pymatgen.core.composition import Composition

# from matminer.datasets import load_dataset
# from matminer.featurizers.composition.composite import ElementProperty
# from matminer.featurizers.composition.element import ElementFraction
# from matminer.featurizers.structure.rdf import RadialDistributionFunction
# from matminer.featurizers.structure.matrix import CoulombMatrix, SineCoulombMatrix


from scipy.stats import pearsonr, skew, kurtosis
from scipy.spatial import distance
from scipy.interpolate import CubicSpline
# from scipy.stats import skew, kurtosis

from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold, StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression, Ridge, Lasso

from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.kernel_ridge import KernelRidge
from sklearn.neural_network import MLPRegressor

from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

# plt.style.use('../../modules/files/plot_style.mplstyle')

import warnings
warnings.filterwarnings('ignore')

**Main Code Begins Here**

## Data Import Section:

1. Import csv file containing data of all batteries as main dataframe. 
2. Using loops, import timeseries and summary of cycles for each sample point in main dataframe into two separate dataframes for preprocessing.
3. Calculate statistical parameters using deltaQ10-100(V) for each cell
4. Calculate slope and intercept of linear fit at different portions of the capacity fade vs cycle # curve. 


In [3]:
def check_strictly_increasing(listA):
    flag=-1
    if (all(i < j for i, j in zip(listA, listA[1:]))):
        flag=1
        #print("Yes, List is sorted."
    else:
        flag=0
        #print("No, List is not sorted.")
        
    return flag;
    
def repeats_remove(listV,listQ):
    flag = check_strictly_increasing(listV)
    while flag != 1:
        removed_index_list = []
        for i in range(len(listV)-1):
            if listV[i]==listV[i+1]:
                removed_index_list.append(i+1)
         
        # print(removed_index_list)
        # print(len(listV))
        for i in removed_index_list:
            listV= np.delete(listV, i)
            listQ= np.delete(listQ, i)
        #print(len(listV))

        flag=check_strictly_increasing(listV)
        #print(flag)

    return(listV,listQ)

def VQ_extract(df_temp, cycle_index):
  df_temp = df_temp[df_temp['Cycle_Index']==cycle_index]
  df_temp = df_temp[df_temp['Current']<0]
  #Bug in datafile: -ve current row carrying forward from previous cycle 
  df_temp = df_temp[df_temp['Discharge_Capacity']>0] 
  voltage= df_temp['Voltage'].values
  discharge= df_temp['Discharge_Capacity'].values
  return voltage, discharge
  
def get_timeseries_data(timeseries_path, chemistry_name):
  # curr_dir = os.getcwd()
  # data_dir = os.path.join(curr_dir,'data/SNL/SNL LFP')
  # filename ='SNL_18650_LFP_15C_0-100_0.5-1C_a_timeseries.csv'
  # timeseries_path = os.path.join(path, timeseries_name)

  df=pd.read_csv(timeseries_path)
  col_names = {'Test_Time (s)' : 'Test_Time',
              'Current (A)' : 'Current',
              'Voltage (V)' : 'Voltage',
              'Charge_Capacity (Ah)': 'Charge_Capacity',
              'Discharge_Capacity (Ah)': 'Discharge_Capacity',
              'Charge_Energy (Wh)': 'Charge_Energy',
              'Discharge_Energy (Wh)': 'Discharge_Energy',
              'Environment_Temperature (C)': 'Environment_Temperature',
              'Cell_Temperature (C)': 'Cell_Temperature'}

  df = df.rename(columns=col_names)

  voltage_10, discharge_10 = VQ_extract(df, 10)
  voltage_100, discharge_100 = VQ_extract(df, 100)

  voltage_10, discharge_10 = repeats_remove(np.flip(voltage_10,0), np.flip(discharge_10,0))
  voltage_100, discharge_100 = repeats_remove(np.flip(voltage_100,0), np.flip(discharge_100,0))

  fit_10= CubicSpline(voltage_10, discharge_10)
  fit_100= CubicSpline(voltage_100, discharge_100)
  
  if chemistry_name == 'LFP':
    voltage = np.linspace(2, 3.2, num = 100)
  elif chemistry_name == 'NCA':
    voltage = np.linspace(2.5, 3.7, num = 100)
  elif chemistry_name == 'NMC':
    voltage = np.linspace(2, 3.8, num = 100)

  # print(chemistry_name)
  del_discharge= np.subtract(fit_10(voltage), fit_100(voltage))
  del_discharge= np.absolute(del_discharge)

  mean = np.absolute(np.log(np.average(del_discharge)))
  minimum = np.absolute(np.log(np.amin(del_discharge)))
  variance = np.absolute(np.log(np.var(del_discharge)))
  skewness = np.absolute(np.log(skew(del_discharge)))
  kurt = np.absolute(np.log(kurtosis(del_discharge)))
  # print(mean, minimum, variance, skewness, kurt)
  return mean, minimum, variance, skewness, kurt;

In [14]:
def get_slopes_lifetime(cycle_data_path):
  """
  cycle_data_path_array- (array)- contains the path to the cycle data csv files
  chemistry_name- (String)- LFP, NCA, NMC
  """
  df = pd.read_csv(cycle_data_path)
  col_names = {'Test_Time (s)' : 'Test_Time',
              'Min_Current (A)' : 'Min_Current',
              'Max_Current (A)' : 'Max_Current',
              'Min_Voltage (V)' : 'Min_Voltage',
              'Max_Voltage (V)' : 'Max_Voltage',
              'Charge_Capacity (Ah)': 'Charge_Capacity',
              'Discharge_Capacity (Ah)': 'Discharge_Capacity',
              'Charge_Energy (Wh)': 'Charge_Energy',
              'Discharge_Energy (Wh)': 'Discharge_Energy',}

  df = df.rename(columns=col_names)
  
  #Calculate Lifetime
  cycle_index =df.loc[:,"Cycle_Index"]
  discharge_capacity = df.loc[:,"Discharge_Capacity"] 
  lifetime_factor = 80/100; 
  initial_discharge_capacity = discharge_capacity[4]
  final_discharge_capacity = lifetime_factor*initial_discharge_capacity
  delta = [abs(final_discharge_capacity-discharge_capacity[i]) for i in range(5,len(discharge_capacity))]
  index_lifetime=delta.index(min(delta))

  #Compute slopes
  cycle_2to100 = [i for i in range(2,100)]
  cycle_91to100 = [i for i in range(91,100)]
  discharge_2to100 = [discharge_capacity[i] for i in range(2,100)]
  discharge_91to100 = [discharge_capacity[i] for i in range(91,100)]
  slope_2to100, intercept_2to100, r_value_2to100, p_value_2to100, std_err_2to100 = scipy.stats.linregress(cycle_2to100, discharge_2to100)
  slope_91to100, intercept_91to100, r_value_91to100, p_value_91to100, std_err_91to100 = scipy.stats.linregress(cycle_91to100, discharge_91to100)
   
  return index_lifetime, slope_2to100, intercept_2to100, slope_91to100, intercept_91to100

In [13]:
#Loading main dataframe
data_dir = os.path.join('..', 'data', 'SNL')
cell_list_path = os.path.join(data_dir, 'ba_cell_list_v2.xlsx')
print(cell_list_path)

df_main = pd.read_excel(cell_list_path)
# print(df_main.head())

tags= df_main['cell ID'].values
tags = [s.replace('/', '-') for s in tags]

../data/SNL/ba_cell_list_v2.xlsx


In [17]:
mean_list = []
minimum_list = []
variance_list = []
skewness_list = []
kurt_list = []

index_lifetime_list = []
slope_2to100_list = []
intercept_2to100_list = []
slope_91to100_list = []
intercept_91to100_list = []

path = '../data/SNL'

for filename in tags:
  cycle_data_name = filename + '_cycle_data.csv'
  timeseries_name = filename + '_timeseries.csv'
  chemistry_name = 'SNL '+ filename[10:13]
  data_dir = os.path.join(path, chemistry_name)
  cycle_data_path = os.path.join(data_dir, cycle_data_name)
  timeseries_path = os.path.join(data_dir, timeseries_name)
  
  print(timeseries_name)
  mean, minimum, variance, skewness, kurt = get_timeseries_data(timeseries_path, chemistry_name[4:])
  index_lifetime, slope_2to100, intercept_2to100, slope_91to100, intercept_91to100 = get_slopes_lifetime(cycle_data_path)

  mean_list.append(mean)
  minimum_list.append(minimum)
  variance_list.append(variance)
  skewness_list.append(skewness)
  kurt_list.append(kurt)
  
  index_lifetime_list.append(index_lifetime)
  slope_2to100_list.append(slope_2to100)
  intercept_2to100_list.append(intercept_2to100)
  slope_91to100_list.append(slope_91to100)
  intercept_91to100_list.append(intercept_91to100)

# df_temp = df_main.loc[:1]
# df_temp['Mean_Q10-100'] = mean_list
# df_temp['Minimum_Q10-100'] = minimum_list
# df_temp['Variance_Q10-100'] = variance_list
# df_temp['Skewness_Q10-100'] = skewness_list
# df_temp['Kurt_Q10-100'] = kurt_list

# df_temp['index_lifetime'] = index_lifetime_list
# df_temp['slope_2to100'] = slope_2to100_list
# df_temp['intercept_2to100'] = intercept_2to100_list
# df_temp['slope_91to100'] = slope_91to100_list
# df_temp['intercept_91to100'] = intercept_91to100_list
# df_temp.head()

df_main['Mean_Q10-100'] = mean_list
df_main['Minimum_Q10-100'] = minimum_list
df_main['Variance_Q10-100'] = variance_list
df_main['Skewness_Q10-100'] = skewness_list
df_main['Kurt_Q10-100'] = kurt_list

df_main['index_lifetime'] = index_lifetime_list
df_main['slope_2to100'] = slope_2to100_list
df_main['intercept_2to100'] = intercept_2to100_list
df_main['slope_91to100'] = slope_91to100_list
df_main['intercept_91to100'] = intercept_91to100_list

df_main= df_main.fillna(0)
df_main.head()

SNL_18650_LFP_15C_0-100_0.5-1C_a_timeseries.csv
SNL_18650_LFP_15C_0-100_0.5-1C_b_timeseries.csv
SNL_18650_LFP_15C_0-100_0.5-2C_a_timeseries.csv
SNL_18650_LFP_15C_0-100_0.5-2C_b_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-0.5C_a_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-1C_a_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-1C_b_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-1C_c_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-1C_d_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-2C_a_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-2C_b_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-3C_a_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-3C_b_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-3C_c_timeseries.csv
SNL_18650_LFP_25C_0-100_0.5-3C_d_timeseries.csv
SNL_18650_LFP_25C_20-80_0.5-0.5C_a_timeseries.csv
SNL_18650_LFP_25C_20-80_0.5-0.5C_b_timeseries.csv
SNL_18650_LFP_25C_20-80_0.5-0.5C_c_timeseries.csv
SNL_18650_LFP_25C_20-80_0.5-0.5C_d_timeseries.csv
SNL_18650_LFP_25C_40-60_0.5-0.5C_a_timeseries.csv
SNL_18650_LFP_25C_40-60_0.5-

Unnamed: 0,cell ID,Cahtode,Anode,Source,Ah,form factor,Temp,max soc,min soc,Charge rate,...,Mean_Q10-100,Minimum_Q10-100,Variance_Q10-100,Skewness_Q10-100,Kurt_Q10-100,index_lifetime,slope_2to100,intercept_2to100,slope_91to100,intercept_91to100
0,SNL_18650_LFP_15C_0-100_0.5/1C_a,LFP,graphite,snl,1.1,18650,15,100,0,0.5,...,5.822216,8.863653,10.375588,1.108221,2.123452,2179,-0.000636,1.075403,0.0,1.032
1,SNL_18650_LFP_15C_0-100_0.5/1C_b,LFP,graphite,snl,1.1,18650,15,100,0,0.5,...,6.089772,11.001877,11.052529,1.202812,2.381854,2328,-0.000644,1.086408,0.0,1.042
2,SNL_18650_LFP_15C_0-100_0.5/2C_a,LFP,graphite,snl,1.1,18650,15,100,0,0.5,...,1.387485,9.516198,0.447229,1.8169,3.678823,2486,-0.000633,1.073278,0.0,1.03
3,SNL_18650_LFP_15C_0-100_0.5/2C_b,LFP,graphite,snl,1.1,18650,15,100,0,0.5,...,1.067402,6.646024,0.031399,1.710467,3.472179,3740,-0.000661,1.070783,0.0,1.025
4,SNL_18650_LFP_25C_0-100_0.5/0.5C_a,LFP,graphite,snl,1.1,18650,25,100,0,0.5,...,4.330093,4.681777,11.592573,1.616025,0.0,2883,-0.000765,1.113444,0.0,1.059


In [18]:
X_path = os.path.join(path, 'X_matrix.csv')
df_main.to_csv(X_path)