In [27]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
import pickle
import pandas as pd

In [28]:
batch1 = pickle.load(open(r'D:\SIT Projects\BatteryML\cleaning\batch1.pkl', 'rb'))

del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [29]:
numBat1 = len(batch1.keys())
numBat1

41

In [30]:
batch2 = pickle.load(open(r'D:\SIT Projects\BatteryML\cleaning\batch2.pkl','rb'))

In [31]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482]

In [32]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [33]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [34]:
numBat2 = len(batch2.keys())
numBat2

43

In [35]:
batch3 = pickle.load(open(r'D:\SIT Projects\BatteryML\cleaning\batch3.pkl','rb'))

del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c42']
del batch3['b3c43']

In [36]:
numBat3 = len(batch3.keys())
numBat3

40

In [37]:
numBat = numBat1 + numBat2 + numBat3
numBat

124

In [38]:
bat_dict = {**batch1, **batch2, **batch3}

In [39]:
feature_dict = {}
for id in bat_dict.keys():
    feature_dict[id] = {"cycle_life":bat_dict[id]["cycle_life"][0][0]}

In [40]:
bat_dict['b1c0']["cycles"]['0']

{'I': array([0, 0], dtype=uint64),
 'Qc': array([0, 0], dtype=uint64),
 'Qd': array([0, 0], dtype=uint64),
 'Qdlin': array([0, 0], dtype=uint64),
 'T': array([0, 0], dtype=uint64),
 'Tdlin': array([0, 0], dtype=uint64),
 'V': array([0, 0], dtype=uint64),
 'dQdV': array([0, 0], dtype=uint64),
 't': array([0, 0], dtype=uint64)}

### Creating Features for Variance, Discharge, Full Model [For RUL]

In [41]:
def crop_data(id,cycle):
    # isolate discharging data
    cropped = np.array([[Q, V] for Q, V in zip(bat_dict[id]["cycles"][cycle]["Qd"], bat_dict[id]["cycles"][cycle]["V"]) if Q > 1e-5])
    for i, datapoint in enumerate(cropped):
        if datapoint[1] < cropped[i+1, 1]:
            if datapoint[1] < 2.2: # identify where discharging ends
                end = i+1
                break
            else: # this is an anomalous increase in voltage; remove so voltage is decreasing
                cropped[i+1, 1] = cropped[i, 1]
    cropped = cropped[:end]    
    x = np.flip(cropped[:, 1])
    y = np.flip(cropped[:, 0])
    return x,y

In [42]:
from scipy.stats import skew, kurtosis

xs = np.linspace(2,3.5,1000)

def interpolate_spline(id):
    x_100,y_100 = crop_data(id,"100")
    x_10,y_10 = crop_data(id,"10")

    cs_100 = UnivariateSpline(x_100,y_100,s=0.001)
    cs_10 = UnivariateSpline(x_10,y_10,s=0.001)
    return cs_10, cs_100

def get_var(id):
    cs_10, cs_100 = interpolate_spline(id)
    return np.log10(np.var(cs_100(xs) - cs_10(xs)))

def get_min(id):
    cs_10, cs_100 = interpolate_spline(id)
    return np.min(cs_100(xs) - cs_10(xs))

def get_skew(id):
    cs_10, cs_100 = interpolate_spline(id)
    return skew(cs_100(xs) - cs_10(xs))

def get_kurt(id):
    cs_10, cs_100 = interpolate_spline(id)
    return kurtosis(cs_100(xs) - cs_10(xs))

In [43]:
for id in bat_dict.keys():
    feature_dict[id]["DeltaQ_logVar"] = get_var(id)
    feature_dict[id]["DeltaQ_Min"] = get_min(id)
    feature_dict[id]["DeltaQ_Skew"] = get_skew(id)
    feature_dict[id]["DeltaQ_Kurt"] = get_kurt(id)

In [44]:
for id in bat_dict.keys():
    # difference between max discharge capacity and cycle 2
    feature_dict[id]["QD_Max-2"] = (np.max(bat_dict[id]["summary"]["QD"])-bat_dict[id]["summary"]["QD"][1])

    # discharge capacity at cycle 2
    feature_dict[id]["QD_2"] = bat_dict[id]["summary"]["QD"][1]

    # slope of linear fit to capacity fade curve, cycles 2 to 100
    linear_fit = np.polyfit(np.linspace(2,100,99),bat_dict[id]["summary"]["QD"][1:100],deg=1)
    feature_dict[id]["slope_capacity_fade_2-100"] = linear_fit[0]

    # intercept of linear fit to capacity fade curve, cycles 2 to 100
    feature_dict[id]["intercept_capacity_fade_2-100"] = linear_fit[1]

    # slope of linear fit to capacity fade curve, cycles 91 to 100
    linear_fit_2 = np.polyfit(np.linspace(91,100,10),bat_dict[id]["summary"]["QD"][90:100],deg=1)
    feature_dict[id]["slope_capacity_fade_91-100"] = linear_fit_2[0]

    # intercept of linear fit to capacity fade curve, cycles 91 to 100
    feature_dict[id]["intercept_capacity_91-100"] = linear_fit_2[1]

In [45]:
for id in bat_dict.keys():
    # average charge time, first five cycles
    feature_dict[id]["init_avg_charge_time"] = np.mean(bat_dict[id]["summary"]["chargetime"][1:6])

    # average temperature over time, cycles 2 through 100
    feature_dict[id]["avg_T"] = np.mean(bat_dict[id]["summary"]["Tavg"][1:100])

    # minimum internal resistance, cycles 2 through 100
    feature_dict[id]["min_IR"] = np.min(bat_dict[id]["summary"]["IR"][1:100])

    # internal resistance, difference between cycle 100 and cycle 2
    feature_dict[id]["IR_100-2"] = bat_dict[id]["summary"]["IR"][99]-bat_dict[id]["summary"]["IR"][1]

In [46]:
feature_df = pd.DataFrame.from_dict(feature_dict,orient="index")

In [47]:
feature_df.to_csv("../Data/features.csv")

In [48]:
loaded_df = pd.read_csv("../Data/features.csv")
loaded_df.head()

Unnamed: 0.1,Unnamed: 0,cycle_life,DeltaQ_logVar,DeltaQ_Min,DeltaQ_Skew,DeltaQ_Kurt,QD_Max-2,QD_2,slope_capacity_fade_2-100,intercept_capacity_fade_2-100,slope_capacity_fade_91-100,intercept_capacity_91-100,init_avg_charge_time,avg_T,min_IR,IR_100-2
0,b1c0,1852.0,-4.83617,-0.009054,0.443416,0.342752,0.468365,1.070689,-0.000207,1.091144,3.5e-05,1.072425,13.374894,31.603747,0.016444,-7.5e-05
1,b1c1,2160.0,-4.988991,-0.010529,0.072319,-0.156308,0.009288,1.075301,6e-06,1.080965,-4.1e-05,1.084767,13.40915,31.330314,0.0,-4.2e-05
2,b1c2,2237.0,-4.812421,-0.013186,0.059164,0.621248,0.008131,1.079922,1e-05,1.084846,-4.4e-05,1.089364,13.358242,31.479584,0.0,-1.4e-05
3,b1c3,1434.0,-4.371544,-0.018933,-0.35385,-1.072029,0.00653,1.079723,1.7e-05,1.084075,-4e-06,1.085131,12.02514,29.942199,0.0,3.9e-05
4,b1c4,1709.0,-4.5339,-0.017832,-0.257654,-0.549822,0.0059,1.078417,1.9e-05,1.081875,-2.9e-05,1.085538,12.041851,31.448884,0.0,-5.3e-05


### Creating SoH Estimation Label

In [49]:
import pandas as pd

records = []
for cell_id, cell_data in bat_dict.items():
    # print(cell_data.keys())
    summary = cell_data['summary']
    cycle_life = int(cell_data['cycle_life'])
    charge_policy = cell_data['charge_policy']
    num_cycles = len(summary['cycle']) 
    for i in range(num_cycles):
        cycle_id = str(i)
        if cycle_id not in cell_data['cycles']:
            continue  
        cycle_data = cell_data['cycles'][cycle_id]
        try:
            t_arr = cycle_data['t']
            Ti = t_arr[-1] - t_arr[0]
        except (KeyError, IndexError):
             Ti = np.nan

        # features from raw time series, cycle level -> time level features
        V_avg = np.mean(cycle_data['V'])
        I_avg = np.mean(cycle_data['I'])
        T_cycle_avg = np.mean(cycle_data['T'])
        dQdV_max = np.max(cycle_data['dQdV'])
        V_std = np.std(cycle_data['V'])

        row = {
            'cell_id': cell_id,
            'cycle': summary['cycle'][i],
            'IR': summary['IR'][i],
            'QCharge': summary['QC'][i],
            'QDischarge': summary['QD'][i],
            'Tavg': summary['Tavg'][i],
            'Tmin': summary['Tmin'][i],
            'Tmax': summary['Tmax'][i],
            'chargetime': summary['chargetime'][i],
            'cycle_life': cycle_life,
            'charge_policy': charge_policy,
            'V_avg': V_avg,
            'I_avg': I_avg,
            'T_cycle_avg': T_cycle_avg,
            'dQdV_max': dQdV_max,
            'V_std': V_std,
            'Time_Stamp': Ti
        }
        records.append(row)

df_summary = pd.DataFrame(records)

# Calculating SoH
# df_summary['SoH'] = df_summary.groupby('cell_id')['QDischarge'].transform(lambda x: x / x.iloc[1])   #0th row has dummy zero in capacity
df_summary['SoH'] = df_summary.groupby('cell_id')['QDischarge'].transform(lambda x: x / 1.1)   #0th row has dummy zero in capacity
df_summary = df_summary[(df_summary['SoH'] >= 0) &(df_summary['SoH'] <= 1)]  # Removed 4 outliers
df_summary.to_csv('features_soh.csv')  # do not re-create files
df_summary.tail()

  cycle_life = int(cell_data['cycle_life'])


Unnamed: 0,cell_id,cycle,IR,QCharge,QDischarge,Tavg,Tmin,Tmax,chargetime,cycle_life,charge_policy,V_avg,I_avg,T_cycle_avg,dQdV_max,V_std,Time_Stamp,SoH
100496,b3c45,1796.0,0.017538,0.881796,0.881539,32.532753,30.328932,34.571934,15.404138,1801,4.8C(80%)-4.8C-newstructure,3.23441,0.813641,32.532753,-0.008491,0.515761,32.897747,0.801399
100497,b3c45,1797.0,0.017294,0.881874,0.881441,32.679857,29.591684,35.492989,15.497987,1801,4.8C(80%)-4.8C-newstructure,3.236741,0.811358,32.679857,0.0,0.516085,33.058932,0.80131
100498,b3c45,1798.0,0.017605,0.881364,0.880883,32.217071,31.470758,33.99955,16.357937,1801,4.8C(80%)-4.8C-newstructure,3.238934,0.798734,32.217071,0.0,0.514695,33.647655,0.800803
100499,b3c45,1799.0,0.017526,0.88112,0.880757,33.03563,29.526152,35.779078,16.04614,1801,4.8C(80%)-4.8C-newstructure,3.242756,0.803322,33.03563,0.0,0.511174,33.0401,0.800688
100500,b3c45,1800.0,0.017456,0.880991,0.880473,32.769553,31.031042,34.29858,16.985313,1801,4.8C(80%)-4.8C-newstructure,3.239326,0.80158,32.769553,0.0,0.5155,34.076402,0.80043


In [50]:
print(bat_dict['b1c0']['cycles']['2']['I'][:5])  #Increasign current


[0.         0.21598469 0.39595431 0.50382875 0.61164595]


In [51]:
df_summary[df_summary['cell_id'] == 'b1c4']

Unnamed: 0,cell_id,cycle,IR,QCharge,QDischarge,Tavg,Tmin,Tmax,chargetime,cycle_life,charge_policy,V_avg,I_avg,T_cycle_avg,dQdV_max,V_std,Time_Stamp,SoH
7679,b1c4,1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1709,4C(80%)-4C,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7680,b1c4,2.0,0.016787,1.078509,1.078417,31.659106,29.640825,34.610603,12.008310,1709,4C(80%)-4C,3.129779,-0.060739,31.659106,0.000000,0.505879,53.053205,0.980379
7681,b1c4,3.0,0.016740,1.078936,1.079012,31.682169,29.737724,34.632095,12.092648,1709,4C(80%)-4C,3.120335,-0.094282,31.682169,0.000000,0.510196,53.052700,0.980920
7682,b1c4,4.0,0.016739,1.079652,1.079747,31.665242,29.748180,34.424313,12.092380,1709,4C(80%)-4C,3.118761,-0.090066,31.665242,0.000000,0.519474,53.051682,0.981588
7683,b1c4,5.0,0.016720,1.080269,1.080427,31.725986,29.840086,34.543621,12.007857,1709,4C(80%)-4C,3.114068,-0.132566,31.725986,0.000000,0.514191,53.051487,0.982206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9382,b1c4,1704.0,0.018766,0.882168,0.881525,31.717959,29.399857,34.925396,18.260818,1709,4C(80%)-4C,3.058598,-0.107556,31.717959,0.000000,0.564928,39.478982,0.801386
9383,b1c4,1705.0,0.018860,0.881689,0.881166,31.700614,29.533007,34.907944,19.175007,1709,4C(80%)-4C,3.041414,0.000643,31.700614,-0.000508,0.582974,39.990078,0.801060
9384,b1c4,1706.0,0.018936,0.881454,0.880753,31.511554,29.512556,34.805012,19.910152,1709,4C(80%)-4C,3.067971,-0.072615,31.511554,-0.001159,0.565173,40.299515,0.800684
9385,b1c4,1707.0,0.018953,0.880914,0.880294,31.718730,29.882748,35.023869,20.729023,1709,4C(80%)-4C,3.053547,-0.066793,31.718730,0.000000,0.569919,40.217103,0.800267
