# Libraries

In [1]:
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Load B0005 file

In [2]:
#File Path
data_file_path = 'C:/Users/ibrah/Desktop/Masters/Dissertation Progress/Datasets/Experimental Datasets/Battery Data Set/B0005.mat'

In [3]:
# Load B0005 file
battery = 'B0005'
mat = scipy.io.loadmat(data_file_path, simplify_cells=True)

In [4]:
# create the dataframe with all battery's charge and discharge cycles

df = pd.DataFrame()

i = 0  # count all cycles
j = 0  # count only charge cycles
k = 0  # count only discharge cycles

for idx in range(len(mat[battery]['cycle'])):

    if mat[battery]['cycle'][idx]['type'] in ['charge', 'discharge']:

        i += 1

        # creates a string with the cycle timestamp
        time_string = '{}-{}-{} {}:{}:{}'.format(
            str(int(mat[battery]['cycle'][idx]['time'][0])),
            str(int(mat[battery]['cycle'][idx]['time'][1])),
            str(int(mat[battery]['cycle'][idx]['time'][2])),
            str(int(mat[battery]['cycle'][idx]['time'][3])),
            str(int(mat[battery]['cycle'][idx]['time'][4])),
            str(mat[battery]['cycle'][idx]['time'][5])
        )

        # reads cycle data
        df_aux = pd.DataFrame(mat[battery]['cycle'][idx]['data'])
        
        # if it is a charging cycle, fill in the Capacity column with the value of the capacity of the next discharging cycle, if any
        if mat[battery]['cycle'][idx]['type']=='charge':
            j += 1
            cycle_type_value = j
            if idx+1<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+1]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+1]['data']).Capacity.mean()
            elif idx+2<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+2]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+2]['data']).Capacity.mean()
            else:
                capacity_value = np.nan
            df_aux = (df_aux
                        .assign(Capacity = capacity_value)
                        .rename(columns={'Current_charge': 'Current', 'Voltage_charge': 'Voltage'})
                    )
        elif mat[battery]['cycle'][idx]['type']=='discharge':
            k += 1
            cycle_type_value = k
            df_aux = df_aux.rename(columns={'Current_load': 'Current', 'Voltage_load': 'Voltage'})

        # create auxiliary columns with the remaining cycle information
        df_aux = df_aux.assign(
                cycle = i,
                cycle_idx = idx,
                cycle_type = cycle_type_value,
                type = mat[battery]['cycle'][idx]['type'],
                ambient_temperature = mat[battery]['cycle'][idx]['ambient_temperature'],
                timestamp = pd.to_datetime(time_string)
            )

        # combine the data from this cycle with the data from previous cycles
        df = pd.concat([df, df_aux], axis=0)

# rearange the dataframe
# Starting from the indexes
df = df.reset_index()
# Columns ordering
cols = ['cycle', 'cycle_type', 'cycle_idx', 'index', 'type', 'ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']
df = df[cols]

In [5]:
df.head()

Unnamed: 0,cycle,cycle_type,cycle_idx,index,type,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity
0,1,1,0,0,charge,24,2008-04-02 13:08:17.921,3.873017,-0.001201,24.655358,0.0,0.003,0.0,1.856487
1,1,1,0,1,charge,24,2008-04-02 13:08:17.921,3.479394,-4.030268,24.66648,-4.036,1.57,2.532,1.856487
2,1,1,0,2,charge,24,2008-04-02 13:08:17.921,4.000588,1.512731,24.675394,1.5,4.726,5.5,1.856487
3,1,1,0,3,charge,24,2008-04-02 13:08:17.921,4.012395,1.509063,24.693865,1.5,4.742,8.344,1.856487
4,1,1,0,4,charge,24,2008-04-02 13:08:17.921,4.019708,1.511318,24.705069,1.5,4.753,11.125,1.856487


In [6]:
#View only one registry per cycle (charge and discharge)
cycles = df.filter(['cycle','cycle_type','type','timestamp']).drop_duplicates()
cycles

Unnamed: 0,cycle,cycle_type,type,timestamp
0,1,1,charge,2008-04-02 13:08:17.921
789,2,1,discharge,2008-04-02 15:25:41.593
986,3,2,charge,2008-04-02 16:37:51.984
1926,4,2,discharge,2008-04-02 19:43:48.406
2122,5,3,charge,2008-04-02 20:55:40.812
...,...,...,...,...
583759,334,168,charge,2008-05-26 21:41:33.468
587251,335,167,discharge,2008-05-27 15:52:41.359
587549,336,169,charge,2008-05-27 17:53:59.765
591153,337,168,discharge,2008-05-27 20:45:42.125


## EDA

In [7]:
eda = df[['cycle','ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']].describe()

In [8]:
eda

Unnamed: 0,cycle,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity
count,591458.0,591458.0,591458,591458.0,591458.0,591458.0,591458.0,591458.0,591458.0,591453.0
mean,189.704094,24.0,2008-05-09 02:17:02.448848128,4.103945,0.369404,26.369701,0.634476,4.02493,4763.856934,1.535012
min,1.0,24.0,2008-04-02 13:08:17.921000,0.003365,-4.47966,23.214802,-4.468,0.0,0.0,1.287453
25%,119.0,24.0,2008-05-01 14:41:41.577999872,4.089426,0.040636,24.488485,0.057,4.24,1928.17575,1.370513
50%,191.0,24.0,2008-05-10 16:11:56.108999936,4.20514,0.171961,25.47963,0.261,4.305,4467.5155,1.516957
75%,264.0,24.0,2008-05-18 23:53:33.780999936,4.205908,1.259778,27.348768,1.498,4.656,7503.824,1.700311
max,338.0,24.0,2008-05-28 11:09:42.046000,8.393141,1.531301,41.450232,1.9984,5.002,10807.328,1.856487
std,86.25014,0.0,,0.213469,0.907558,2.772424,0.737029,1.208825,3147.859026,0.174426


In [9]:
# Cycle Type (Charge and Discharge)
ct = df.groupby('type')['cycle_type'].describe().transpose()
ct

type,charge,discharge
count,541173.0,50285.0
mean,96.606471,88.125942
std,42.702483,45.699687
min,1.0,1.0
25%,62.0,50.0
50%,97.0,88.0
75%,133.0,127.0
max,170.0,168.0


In [10]:
# right join of eda and ct
eda = eda.join(ct)
eda


Unnamed: 0,cycle,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity,charge,discharge
count,591458.0,591458.0,591458,591458.0,591458.0,591458.0,591458.0,591458.0,591458.0,591453.0,541173.0,50285.0
mean,189.704094,24.0,2008-05-09 02:17:02.448848128,4.103945,0.369404,26.369701,0.634476,4.02493,4763.856934,1.535012,96.606471,88.125942
min,1.0,24.0,2008-04-02 13:08:17.921000,0.003365,-4.47966,23.214802,-4.468,0.0,0.0,1.287453,1.0,1.0
25%,119.0,24.0,2008-05-01 14:41:41.577999872,4.089426,0.040636,24.488485,0.057,4.24,1928.17575,1.370513,62.0,50.0
50%,191.0,24.0,2008-05-10 16:11:56.108999936,4.20514,0.171961,25.47963,0.261,4.305,4467.5155,1.516957,97.0,88.0
75%,264.0,24.0,2008-05-18 23:53:33.780999936,4.205908,1.259778,27.348768,1.498,4.656,7503.824,1.700311,133.0,127.0
max,338.0,24.0,2008-05-28 11:09:42.046000,8.393141,1.531301,41.450232,1.9984,5.002,10807.328,1.856487,170.0,168.0
std,86.25014,0.0,,0.213469,0.907558,2.772424,0.737029,1.208825,3147.859026,0.174426,42.702483,45.699687


In [11]:
# Number of registries per cycle type
df['type'].value_counts()

type
charge       541173
discharge     50285
Name: count, dtype: int64

In [None]:
eda.transpose()

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
cycle,591458.0,189.704094,1.0,119.0,191.0,264.0,338.0,86.25014
ambient_temperature,591458.0,24.0,24.0,24.0,24.0,24.0,24.0,0.0
timestamp,591458.0,2008-05-09 02:17:02.448848128,2008-04-02 13:08:17.921000,2008-05-01 14:41:41.577999872,2008-05-10 16:11:56.108999936,2008-05-18 23:53:33.780999936,2008-05-28 11:09:42.046000,
Voltage_measured,591458.0,4.103945,0.003365,4.089426,4.20514,4.205908,8.393141,0.213469
Current_measured,591458.0,0.369404,-4.47966,0.040636,0.171961,1.259778,1.531301,0.907558
Temperature_measured,591458.0,26.369701,23.214802,24.488485,25.47963,27.348768,41.450232,2.772424
Current,591458.0,0.634476,-4.468,0.057,0.261,1.498,1.9984,0.737029
Voltage,591458.0,4.02493,0.0,4.24,4.305,4.656,5.002,1.208825
Time,591458.0,4763.856934,0.0,1928.17575,4467.5155,7503.824,10807.328,3147.859026
Capacity,591453.0,1.535012,1.287453,1.370513,1.516957,1.700311,1.856487,0.174426


In [13]:
# Count how many rows (registries) exist for each cycle
df.cycle.value_counts(sort = False)

cycle
1       789
2       197
3       940
4       196
5       937
       ... 
334    3492
335     298
336    3604
337     300
338       5
Name: count, Length: 338, dtype: int64

# Save To csv

In [None]:
df.to_csv("vars_b5.csv", index = False)