In [None]:
%pip install jupyter numpy pandas matplotlib scikit-learn h5py

In [None]:
import h5py
import scipy.io
import scipy.stats
from sklearn import linear_model
import pandas as pd
import numpy as np

In [None]:
%run "VQlinspace2.ipynb"

## Read matlab files

In [None]:
matFilename = '/Users/guancong/Desktop/sch/3110_ITP/BatteryLifeTimeML/Data/2017-05-12_batchdata_updated_struct_errorcorrect.mat'
f1 = h5py.File(matFilename)

In [None]:
matFilename = '/Users/guancong/Desktop/sch/3110_ITP/BatteryLifeTimeML/Data/2017-06-30_batchdata_updated_struct_errorcorrect.mat'
f2 = h5py.File(matFilename)

In [None]:
batch1 = f1['batch']
batch2 = f2['batch']
batch1.keys()

In [None]:
f1[batch1['summary'][0,0]].keys()

In [None]:
f1[batch1['cycles'][0,0]].keys()

In [None]:
f1[batch1['cycles'][0,0]]['V'].shape

In [None]:
s1 = f2[batch2['summary'][3,0]]
np.argmax(s1['QDischarge'][0] < 0.88)
# f2[batch2['cycle_life'][3,0]][0,0]

In [None]:
num_cells = batch1['summary'].shape[0] + batch2['summary'].shape[0]

## Extract Features

In [None]:
def extract_string(batch, file, column, row):
    text = file[batch[column][row,0]]
    return u''.join(chr(c[0]) for c in text)

def extract_features(batch, file, start_from=0):
    num_cells_batch = batch['summary'].shape[0]
    batch_feats = pd.DataFrame(0.0, index=np.arange(num_cells_batch), columns=(
        'policy', 'barcode', 'cycle_life', 'QD2', 'QD(Max-2)', 'QD100',
        'QDiffMin', 'QDiffMean', 'QDiffVar', 'QDiffSkew', 'QDiffKurtosis',
        'QDiffStart', 'R3Coef', 'R3Intercept', 'R1Coef', 'R1Intercept',
        'QDiffLinVar'
    ))
    batch_feats['policy'] = batch_feats['policy'].astype(str)
    batch_feats['barcode'] = batch_feats['barcode'].astype(str)
    for i in range(num_cells_batch):
        c_index = start_from + i
        batch_feats.at[c_index, 'policy'] = extract_string(batch, file, 'policy', i)
        # barcode is encoded in MATLAB proprietary string format
        # Mathworks Inc have not disclosed how this encoding works
        # so no open source library is able to load it
        # batch_feats.at[c_index, 'barcode'] = extract_string(batch, file, 'barcode', i)
        batch_feats.at[c_index, 'barcode'] = "???"
        # batch_feats.at[c_index, 'cycle_life'] = file[batch['cycle_life'][i,0]][0,0]
        
        summary = file[batch['summary'][i,0]]
        cycles = file[batch['cycles'][i,0]]

        # extract the number of cycles to 0.88
        if summary['QDischarge'][0,-1] < 0.88:
            batch_feats.at[c_index, 'cycle_life'] = np.argmax(
                summary['QDischarge'][0] < 0.88
            ) + 1
        else:
            batch_feats.at[c_index, 'cycle_life'] = cycles['V'].shape[0]
        # capacity features
        batch_feats.at[c_index,'QD2'] = summary['QDischarge'][0,1]
        batch_feats.at[c_index,'QD(Max-2)'] = max(
            summary['QDischarge'][0,0:99]
        ) - batch_feats.at[c_index,'QD2']
        batch_feats.at[c_index,'QD100'] = summary['QDischarge'][0,99]

        # Q features
        Q10 = np.hstack((file[cycles['Qdlin'][9,0]]))
        Q100 = np.hstack((file[cycles['Qdlin'][99,0]]))
        Qdiff = Q100 - Q10

        batch_feats.at[c_index,'QDiffMin'] = np.log10(np.abs(np.min(Qdiff)))
        batch_feats.at[c_index,'QDiffMean'] = np.log10(np.abs(np.mean(Qdiff)))
        batch_feats.at[c_index,'QDiffVar'] = np.log10(np.abs(np.var(Qdiff)))
        batch_feats.at[c_index,'QDiffSkew'] = np.log10(np.abs(scipy.stats.skew(Qdiff)))
        batch_feats.at[c_index,'QDiffKurtosis'] = np.log10(np.abs(scipy.stats.kurtosis(Qdiff)))
        batch_feats.at[c_index,'QDiffStart'] = np.log10(np.abs(Qdiff[0]))

        # https://stackoverflow.com/questions/46779605/in-the-linearregression-method-in-sklearn-what-exactly-is-the-fit-intercept-par
        R3 = linear_model.LinearRegression(fit_intercept=True)
        R3.fit(
            np.arange(2,100).reshape(-1,1),
            summary['QDischarge'][0,1:99]
        )
        batch_feats.at[c_index,'R3Coef'] = R3.coef_[0]
        batch_feats.at[c_index,'R3Intercept'] = R3.intercept_

        R1 = linear_model.LinearRegression(fit_intercept=True)
        R1.fit(
            np.arange(91,100).reshape(-1,1),
            summary['QDischarge'][0,90:99]
        )
        batch_feats.at[c_index,'R1Coef'] = R1.coef_[0]
        batch_feats.at[c_index,'R1Intercept'] = R1.intercept_

        # Linearly Interpolated Var(QD100-QD10)
        Q10Lin, _ = VQlinspace2(
            file[cycles['I'][9,0]][0],
            file[cycles['V'][9,0]][0],
            file[cycles['t'][9,0]][0]
        )
        Q100Lin, _ = VQlinspace2(
            file[cycles['I'][99,0]][0],
            file[cycles['V'][99,0]][0],
            file[cycles['t'][99,0]][0]
        )
        QDiffLin = Q100Lin - Q10Lin
        batch_feats.at[c_index,'QDiffLinVar'] = np.log10(np.abs(np.var(
            QDiffLin
        )))

    return batch_feats

In [None]:
batch1_feats = extract_features(batch1, f1)
numbat1 = batch1_feats.shape[0]
batch1_feats

In [None]:
batch2_feats = extract_features(batch2, f2)
batch2_feats

In [None]:
# remove batteries continuing from batch 1
batch2_feats.drop([7,8,9,15,16], inplace=True)
numbat2 = batch2_feats.shape[0]

In [None]:
batch_combined = pd.concat((batch1_feats, batch2_feats))
num_cells = numbat1 + numbat2
batch_combined

In [None]:
# remove the batteries that do not finish in Batch 1
batch_combined.drop([8,10,12,13,22], inplace=True)
num_cells = num_cells - 5
numbat1 = numbat1 - 5

In [None]:
batch_combined.to_csv("./features_combined.csv", index=False)

### Test Sketches
Just some notes

In [None]:
policy_n = f1[batch1['barcode'][0,0]]

In [None]:
policy_n.attrs['MATLAB_class']

In [None]:
policy_n[0]

In [None]:
strlist = [u''.join(chr(c[0]) for c in policy_n)]
strlist