In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy 
import os
# import seaborn as sns
# import scipy.stats as stats
import unittest

## Battery Dataset

In [2]:
#Upload metadata
metadata = pd.read_csv('snl_metadata_cycle_500.csv')

#Define data directory and load files, ensure the data directory and this notebook are in the same directory
data_dir = "Cycle Data"
files = os.listdir(data_dir)

#Initialize dataframe header row 
df = pd.read_csv(data_dir+"\\"+files[0])
df_output = df.head(0)

# Define cycles 
cycle = [50.0, 100.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 450.0, 500.0]

  

In [3]:
#Build dataframe by adding the correct row of data pulled from "Cycle" files
#Include the output "Coulombic efficiency"
#Add the cell_id from "Cycle" files

cell_id_2 =[]

i=0                
for file in files:
    df = pd.read_csv(data_dir+"\\"+files[i])
    
    for j in range(len(cycle)):
        df_cycles = df.loc[df['Cycle_Index'] == cycle[j]]
        df_output = df_output.append(df_cycles, ignore_index = True)
        
        df_output['Coulombic_Efficiency (%)'] = df_output['Discharge_Capacity (Ah)']/df_output['Charge_Capacity (Ah)']*100
        
        cell_id_2.append(file[:-15])
        df_output['cell_id_2'] = cell_id_2
        
    i+=1     

In [4]:
df_output = pd.concat([metadata, df_output],axis=1)

In [5]:
#Define data directory and load files
#All "Time Data #" zip files need to be unzipped and all files moved to a "Time Data" directory
#Ensure the Time data directory and this notebook are in the same directory
data_dir2 = "Time Data"
files2 = os.listdir(data_dir2)

#Create new columns 
df_output['Voltages (V)'] = None 
df_output['Discharge_Capacities (Ah)'] = None 

In [None]:
#Pull array of data from "Time" files and append them into one array
voltages = []
discharge_capacities = []
i=0     
for file in files2:  
    df2 = pd.read_csv(data_dir2+"\\"+files2[i])
    
    for j in range(10):
        df_time = df2.loc[df2['Cycle_Index'] == df_output['Cycle'][j]]
        voltages.append(df_time['Voltage (V)'].values)
        discharge_capacities.append(df_time['Discharge_Capacity (Ah)'].values)
    i+=1    
    
#Populate the newly created columns with each cell having an array of data
for i in range(len(df_output)):
    df_output['Voltages (V)'][i] = voltages[i]
    df_output['Discharge_Capacities (Ah)'][i] = discharge_capacities[i]

In [None]:
#Check to make sure data pulled from "Cycle" files match the correct metadata line
for i in range(len(df_output)):
    if df_output['cell_id'][i] == df_output['cell_id_2'][i]:
        pass
    else:
        print('not a match', df_output['cell_id'][i],df_output['cell_id_2'][i])
        
for i in range(len(df_output)):
    if df_output['Cycle'][i] == df_output['Cycle_Index'][i]:
        pass
    else:
        print('not a match', df_output['Cycle'][i],df_output['Cycle_Index'][i])

In [None]:
#Load full Battery dataset
df_battery = df_output
df_battery = df_battery.drop('cell_id_2', 1)
df_battery = df_battery.drop('Cycle_Index', 1)

In [None]:
# pd.set_option("display.max_rows", None, "display.max_columns", None)
# df_battery

## Battery Dataset Filtered

In [None]:
#Create a column that will indicated replicated experiments 
df_battery['rep'] = None 

#Populate the column, replicates will have a value of 1
for i in range(len(df_battery)):
    if df_battery.cell_id.values[i][-1] == 'a':
        df_battery['rep'][i] = 0
    if df_battery.cell_id.values[i][-1] != 'a':
        df_battery['rep'][i] = 1

#Re

In [None]:
#Drop any row of replicated experiment data and load filtered Battery Dataset
df_battery_filtered = df_battery[df_battery['rep'] < 0.5]
df_battery_filtered = df_battery_filtered.drop('rep', 1)
df_battery_filtered = df_battery_filtered.reset_index(drop=True)

In [None]:
# df_battery_filtered


## Decision Tree small test

In [None]:
for i in range(len(df_battery_filtered)):
    if df_battery_filtered.at[i,'anode'] == 'graphite':
        df_battery_filtered.at[i,'anode'] = 0
    if df_battery_filtered.at[i,'cathode'] == 'LFP':
        df_battery_filtered.at[i,'cathode'] = 0
    if df_battery_filtered.at[i,'cathode'] == 'NCA':
        df_battery_filtered.at[i,'cathode'] = 1
    if df_battery_filtered.at[i,'cathode'] == 'NMC':
        df_battery_filtered.at[i,'cathode'] = 2
    if df_battery_filtered.at[i,'electrolyte'] == 'A123':
        df_battery_filtered.at[i,'electrolyte'] = 0
    if df_battery_filtered.at[i,'electrolyte'] == 'Pan':
        df_battery_filtered.at[i,'electrolyte'] = 1
    if df_battery_filtered.at[i,'electrolyte'] == 'LGC':
        df_battery_filtered.at[i,'electrolyte'] = 2

In [None]:
import sklearn         
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score

In [None]:
x = df_battery_filtered[['cathode', 'electrolyte', 'Cycle', 'temperature', 'discharge_crate']]
x = np.asarray(x)

y = df_battery_filtered[['Discharge_Capacity (Ah)']] 
#y = df_battery_filtered[['Discharge_Energy (Wh)']] 
#y = df_battery_filtered[['Coulombic_Efficiency (%)']] 
y = np.asarray(y)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.65, random_state=2)
#x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

regr_DT = DecisionTreeRegressor(max_depth=3)
regr_DT.fit(x_train, y_train)

y_pred = regr_DT.predict(x_test)
MSE = mean_squared_error(y_test, y_pred)

MSE
# y.mean()
# y.std()