In [1]:
import h5py
import scipy.io
import scipy.stats
from sklearn import linear_model
import pandas as pd
import numpy as np

## Read matlab files

In [2]:
matFilename = './Data/2017-05-12_batchdata_updated_struct_errorcorrect.mat'
f1 = h5py.File(matFilename)

In [3]:
matFilename = './Data/2017-06-30_batchdata_updated_struct_errorcorrect.mat'
f2 = h5py.File(matFilename)

In [4]:
batch1 = f1['batch']
batch2 = f2['batch']
batch1.keys()

<KeysViewHDF5 ['Vdlin', 'barcode', 'channel_id', 'cycle_life', 'cycles', 'policy', 'policy_readable', 'summary']>

In [5]:
f1[batch1['summary'][0,0]].keys()

<KeysViewHDF5 ['IR', 'QCharge', 'QDischarge', 'Tavg', 'Tmax', 'Tmin', 'chargetime', 'cycle']>

In [130]:
f1[batch1['cycles'][0,0]].keys()

<KeysViewHDF5 ['I', 'Qc', 'Qd', 'Qdlin', 'T', 'Tdlin', 'V', 'discharge_dQdV', 't']>

In [131]:
f1[batch1['cycles'][0,0]]['V'].shape

(1189, 1)

In [127]:
s1 = f2[batch2['summary'][3,0]]
np.argmax(s1['QDischarge'][0] < 0.88)
# f2[batch2['cycle_life'][3,0]][0,0]

334

In [6]:
num_cells = batch1['summary'].shape[0] + batch2['summary'].shape[0]

## Extract Features

In [132]:
def extract_string(batch, file, column, row):
    text = file[batch[column][row,0]]
    return u''.join(chr(c[0]) for c in text)

def extract_features(batch, file, start_from=0):
    num_cells_batch = batch['summary'].shape[0]
    batch_feats = pd.DataFrame(0, index=np.arange(num_cells_batch), columns=(
        'policy', 'barcode', 'cycle_life', 'QD2', 'QD(Max-2)', 'QD100',
        'QDiffMin', 'QDiffMean', 'QDiffVar', 'QDiffSkew', 'QDiffKurtosis',
        'QDiffStart', 'R3Coef', 'R3Intercept', 'R1Coef', 'R1Intercept'
    ))
    batch_feats['policy'] = batch_feats['policy'].astype(str)
    batch_feats['barcode'] = batch_feats['barcode'].astype(str)
    for i in range(num_cells_batch):
        c_index = start_from + i
        batch_feats.at[c_index, 'policy'] = extract_string(batch, file, 'policy', i)
        # barcode is encoded in MATLAB proprietary string format
        # Mathworks Inc have not disclosed how this encoding works
        # so no open source library is able to load it
        # batch_feats.at[c_index, 'barcode'] = extract_string(batch, file, 'barcode', i)
        batch_feats.at[c_index, 'barcode'] = "???"
        # batch_feats.at[c_index, 'cycle_life'] = file[batch['cycle_life'][i,0]][0,0]
        
        summary = file[batch['summary'][i,0]]
        cycles = file[batch['cycles'][i,0]]

        # extract the number of cycles to 0.88
        if summary['QDischarge'][0,-1] < 0.88:
            batch_feats.at[c_index, 'cycle_life'] = np.argmax(
                summary['QDischarge'][0] < 0.88
            ) + 1
        else:
            batch_feats.at[c_index, 'cycle_life'] = cycles['V'].shape[0]
        # capacity features
        batch_feats.at[c_index,'QD2'] = summary['QDischarge'][0,1]
        batch_feats.at[c_index,'QD(Max-2)'] = max(
            summary['QDischarge'][0,0:99]
        ) - batch_feats.at[c_index,'QD2']
        batch_feats.at[c_index,'QD100'] = summary['QDischarge'][0,99]

        # Q features
        Q10 = np.hstack((file[cycles['Qdlin'][9,0]]))
        Q100 = np.hstack((file[cycles['Qdlin'][99,0]]))
        Qdiff = Q100 - Q10

        batch_feats.at[c_index,'QDiffMin'] = np.log10(np.abs(np.min(Qdiff)))
        batch_feats.at[c_index,'QDiffMean'] = np.log10(np.abs(np.mean(Qdiff)))
        batch_feats.at[c_index,'QDiffVar'] = np.log10(np.abs(np.var(Qdiff)))
        batch_feats.at[c_index,'QDiffSkew'] = np.log10(np.abs(scipy.stats.skew(Qdiff)))
        batch_feats.at[c_index,'QDiffKurtosis'] = np.log10(np.abs(scipy.stats.kurtosis(Qdiff)))
        batch_feats.at[c_index,'QDiffStart'] = np.log10(np.abs(Qdiff[0]))

        # https://stackoverflow.com/questions/46779605/in-the-linearregression-method-in-sklearn-what-exactly-is-the-fit-intercept-par
        R3 = linear_model.LinearRegression(fit_intercept=True)
        R3.fit(
            np.arange(2,100).reshape(-1,1),
            summary['QDischarge'][0,1:99]
        )
        batch_feats.at[c_index,'R3Coef'] = R3.coef_[0]
        batch_feats.at[c_index,'R3Intercept'] = R3.intercept_

        R1 = linear_model.LinearRegression(fit_intercept=True)
        R1.fit(
            np.arange(91,100).reshape(-1,1),
            summary['QDischarge'][0,90:99]
        )
        batch_feats.at[c_index,'R1Coef'] = R1.coef_[0]
        batch_feats.at[c_index,'R1Intercept'] = R1.intercept_

    return batch_feats

In [133]:
batch1_feats = extract_features(batch1, f1)
numbat1 = batch1_feats.shape[0]
batch1_feats

Unnamed: 0,policy,barcode,cycle_life,QD2,QD(Max-2),QD100,QDiffMin,QDiffMean,QDiffVar,QDiffSkew,QDiffKurtosis,QDiffStart,R3Coef,R3Intercept,R1Coef,R1Intercept
0,3_6C-80PER_3_6C,???,1189,1.070689,0.468365,1.075913,-2.072648,-2.541602,-5.014861,-0.274041,0.12979,-4.261444,-0.0002102467,1.091262,3.9e-05,1.072078
1,3_6C-80PER_3_6C,???,1178,1.075301,0.009288,1.08063,-1.958457,-2.387257,-5.01396,-0.367163,0.012464,-3.803325,6.096039e-06,1.080945,-4.1e-05,1.08471
2,3_6C-80PER_3_6C,???,1176,1.079922,0.008131,1.08494,-1.764058,-2.34807,-4.737,0.033502,-0.457627,-5.056484,1.088596e-05,1.084826,-4.4e-05,1.089331
3,4C-80PER_4C,???,1225,1.079723,0.00653,1.08475,-1.722149,-2.127507,-4.442613,-0.357486,0.039579,-3.739833,1.779205e-05,1.084053,-3e-06,1.085031
4,4C-80PER_4C,???,1226,1.078417,0.0059,1.082646,-1.855177,-2.240332,-4.647744,-0.440634,0.125101,-4.380559,1.970664e-05,1.08185,-3.3e-05,1.085851
5,4_4C-80PER_4_4C,???,1073,1.076127,0.005944,1.079779,-1.598965,-1.955699,-4.178878,-0.825794,0.089031,-3.625048,1.192159e-05,1.079731,-2.5e-05,1.082215
6,4_8C-80PER_4_8C,???,635,1.075836,0.006303,1.079243,-1.421521,-1.79683,-3.768878,-0.389408,0.092745,-4.207368,-3.903811e-06,1.081126,-6.1e-05,1.085331
7,4_8C-80PER_4_8C,???,869,1.093864,0.004104,1.095762,-1.417557,-1.778697,-3.813486,-0.477013,0.049426,-3.63007,-5.892347e-06,1.097236,-3.9e-05,1.099673
8,5_4C-40PER_3_6C,???,878,1.089975,0.005939,1.094136,-1.511341,-1.892147,-4.001195,-0.482653,0.080113,-4.847953,5.998624e-06,1.094475,-1.2e-05,1.095418
9,5_4C-40PER_3_6C,???,1053,1.082974,0.005976,1.087631,-1.541748,-1.938902,-4.059384,-0.368971,0.07268,-4.42457,1.881074e-05,1.086751,-1.2e-05,1.088857


In [134]:
batch2_feats = extract_features(batch2, f2)
batch2_feats

Unnamed: 0,policy,barcode,cycle_life,QD2,QD(Max-2),QD100,QDiffMin,QDiffMean,QDiffVar,QDiffSkew,QDiffKurtosis,QDiffStart,R3Coef,R3Intercept,R1Coef,R1Intercept
0,1C_4PER_6C,???,300,1.070054,0.00353,1.038452,-0.868309,-1.146677,-2.745713,-0.310813,-0.054311,-3.862233,-0.0003516294,1.078732,-0.000647,1.104041
1,2C_10PER_6C,???,148,1.07207,0.001546,0.968339,-0.860038,-1.109668,-2.726901,-0.031052,-0.292914,-4.713663,-0.001087775,1.08426,-0.001378,1.106627
2,2C_2PER_5C,???,438,1.073539,0.006428,1.075998,-1.263094,-1.610985,-3.493259,-0.769044,0.108508,-4.377028,-1.190644e-05,1.078649,-5.7e-05,1.081674
3,2C_7PER_5_5C,???,335,1.066851,0.002059,1.046734,-0.996793,-1.246302,-2.929546,-0.374625,0.014785,-4.533756,-0.0002254496,1.072454,-0.000349,1.081733
4,3_6C_22PER_5_5C,???,444,1.072897,0.00275,1.065091,-1.193765,-1.491719,-3.352005,-0.795788,0.055902,-4.867781,-0.0001057628,1.077297,-0.000184,1.083598
5,3_6C_2PER_4_85C,???,480,1.068015,0.005005,1.070745,-1.317857,-1.653251,-3.602695,-1.100566,0.105057,-3.682376,-3.971151e-07,1.072007,-7.9e-05,1.078562
6,3_6C_30PER_6C,???,511,1.068739,0.003792,1.052199,-1.139919,-1.405886,-3.24439,-0.361251,0.005525,-4.945456,-0.0002131106,1.075614,-0.000295,1.081653
7,80PER_3_6C,???,662,1.023337,0.000764,1.010512,-1.776554,-1.983637,-4.43826,-0.112095,0.028689,-5.796818,-0.0001399865,1.024442,-0.000105,1.021342
8,80PER_3_6C,???,981,1.026484,0.005743,1.030571,-2.304632,-2.731498,-5.206001,0.037423,-0.036902,-4.821055,-8.865661e-07,1.031028,-5.9e-05,1.036086
9,80PER_3_6C,???,1060,1.03175,0.004664,1.034821,-1.33233,-2.103273,-3.909819,0.335374,0.57832,-3.993789,-8.940182e-06,1.035577,2.9e-05,1.031692


In [135]:
# remove batteries continuing from batch 1
batch2_feats.drop([7,8,9,15,16], inplace=True)
numbat2 = batch2_feats.shape[0]

In [136]:
batch_combined = pd.concat((batch1_feats, batch2_feats))
num_cells = numbat1 + numbat2
batch_combined

Unnamed: 0,policy,barcode,cycle_life,QD2,QD(Max-2),QD100,QDiffMin,QDiffMean,QDiffVar,QDiffSkew,QDiffKurtosis,QDiffStart,R3Coef,R3Intercept,R1Coef,R1Intercept
0,3_6C-80PER_3_6C,???,1189,1.070689,0.468365,1.075913,-2.072648,-2.541602,-5.014861,-0.274041,0.129790,-4.261444,-0.000210,1.091262,0.000039,1.072078
1,3_6C-80PER_3_6C,???,1178,1.075301,0.009288,1.080630,-1.958457,-2.387257,-5.013960,-0.367163,0.012464,-3.803325,0.000006,1.080945,-0.000041,1.084710
2,3_6C-80PER_3_6C,???,1176,1.079922,0.008131,1.084940,-1.764058,-2.348070,-4.737000,0.033502,-0.457627,-5.056484,0.000011,1.084826,-0.000044,1.089331
3,4C-80PER_4C,???,1225,1.079723,0.006530,1.084750,-1.722149,-2.127507,-4.442613,-0.357486,0.039579,-3.739833,0.000018,1.084053,-0.000003,1.085031
4,4C-80PER_4C,???,1226,1.078417,0.005900,1.082646,-1.855177,-2.240332,-4.647744,-0.440634,0.125101,-4.380559,0.000020,1.081850,-0.000033,1.085851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,6C_31PER_4_25C,???,462,1.073565,0.007075,1.079078,-1.226029,-1.547619,-3.394968,-1.993884,0.134112,-4.417652,0.000013,1.078951,-0.000031,1.082138
44,6C_40PER_4C,???,457,1.070887,0.006366,1.076360,-1.244632,-1.577172,-3.452581,-1.389835,0.111612,-4.012170,0.000023,1.075190,-0.000017,1.078153
45,6C_4PER_4_75C,???,487,1.070473,0.005186,1.073041,-1.361351,-1.680775,-3.688418,-1.539699,0.113010,-4.279524,-0.000006,1.074844,-0.000079,1.080819
46,6C_52PER_3_5C,???,429,1.072614,0.007384,1.079520,-1.219913,-1.521692,-3.368943,-1.205534,0.132095,-4.142284,0.000030,1.077609,-0.000013,1.080783


In [137]:
# remove the batteries that do not finish in Batch 1
batch_combined.drop([8,10,12,13,22], inplace=True)
num_cells = num_cells - 5
numbat1 = numbat1 - 5

In [None]:
batch_combined.to_csv("./Data/features_combined.csv", index=False)

### Test Sketches
Just some notes

In [57]:
policy_n = f1[batch1['barcode'][0,0]]

In [58]:
policy_n.attrs['MATLAB_class']

b'string'

In [65]:
policy_n[0]

array([3707764736,          2,          1,          1,          1,
                1], dtype=uint32)

In [20]:
strlist = [u''.join(chr(c[0]) for c in policy_n)]
strlist

['3_6C-80PER_3_6C']