# Libraries

In [1]:
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load B0018 file

In [2]:
#File Path
data_file_path = 'C:/Users/ibrah/Desktop/Masters/Dissertation Progress/Datasets/Experimental Datasets/Battery Data Set/B0018.mat'

In [3]:
# Load B0018 file
battery = 'B0018'
mat = scipy.io.loadmat(data_file_path, simplify_cells=True)

In [4]:
# create the dataframe with all battery's charge and discharge cycles

df = pd.DataFrame()

i = 0  # count all cycles
j = 0  # count only charge cycles
k = 0  # count only discharge cycles

for idx in range(len(mat[battery]['cycle'])):

    if mat[battery]['cycle'][idx]['type'] in ['charge', 'discharge']:

        i += 1

        # creates a string with the cycle timestamp
        time_string = '{}-{}-{} {}:{}:{}'.format(
            str(int(mat[battery]['cycle'][idx]['time'][0])),
            str(int(mat[battery]['cycle'][idx]['time'][1])),
            str(int(mat[battery]['cycle'][idx]['time'][2])),
            str(int(mat[battery]['cycle'][idx]['time'][3])),
            str(int(mat[battery]['cycle'][idx]['time'][4])),
            str(mat[battery]['cycle'][idx]['time'][5])
        )

        # reads cycle data
        df_aux = pd.DataFrame(mat[battery]['cycle'][idx]['data'])
        
        # if it is a charging cycle, fill in the Capacity column with the value of the capacity of the next discharging cycle, if any
        if mat[battery]['cycle'][idx]['type']=='charge':
            j += 1
            cycle_type_value = j
            if idx+1<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+1]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+1]['data']).Capacity.mean()
            elif idx+2<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+2]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+2]['data']).Capacity.mean()
            else:
                capacity_value = np.nan
            df_aux = (df_aux
                        .assign(Capacity = capacity_value)
                        .rename(columns={'Current_charge': 'Current', 'Voltage_charge': 'Voltage'})
                    )
        elif mat[battery]['cycle'][idx]['type']=='discharge':
            k += 1
            cycle_type_value = k
            df_aux = df_aux.rename(columns={'Current_load': 'Current', 'Voltage_load': 'Voltage'})

        # create auxiliary columns with the remaining cycle information
        df_aux = df_aux.assign(
                cycle = i,
                cycle_idx = idx,
                cycle_type = cycle_type_value,
                type = mat[battery]['cycle'][idx]['type'],
                ambient_temperature = mat[battery]['cycle'][idx]['ambient_temperature'],
                timestamp = pd.to_datetime(time_string)
            )

        # combine the data from this cycle with the data from previous cycles
        df = pd.concat([df, df_aux], axis=0)

# rearange the dataframe
# Starting from the indexes
df = df.reset_index()
# Columns ordering
cols = ['cycle', 'cycle_type', 'cycle_idx', 'index', 'type', 'ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']
df = df[cols]

In [5]:
df.head()

Unnamed: 0,cycle,cycle_type,cycle_idx,index,type,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity
0,1,1,0,0,charge,24,2008-07-07 12:26:45.750,3.865713,0.001014,23.735721,0.0,-0.007,0.0,1.855005
1,1,1,0,1,charge,24,2008-07-07 12:26:45.750,3.447651,-4.034445,23.743956,-4.036,1.553,2.484,1.855005
2,1,1,0,2,charge,24,2008-07-07 12:26:45.750,4.005559,1.517435,23.773723,1.507,4.721,5.109,1.855005
3,1,1,0,3,charge,24,2008-07-07 12:26:45.750,4.015989,1.514558,23.777077,1.507,4.737,7.562,1.855005
4,1,1,0,4,charge,24,2008-07-07 12:26:45.750,4.02323,1.517284,23.79271,1.507,4.743,10.062,1.855005


In [6]:
#View only one registry per cycle (charge and discharge)
cycles = df.filter(['cycle','cycle_type','type','timestamp']).drop_duplicates()
cycles

Unnamed: 0,cycle,cycle_type,type,timestamp
0,1,1,charge,2008-07-07 12:26:45.750
2816,2,1,discharge,2008-07-07 15:15:28.875
3182,3,2,charge,2008-07-07 18:20:14.250
6959,4,2,discharge,2008-07-07 21:53:06.125
7321,5,3,charge,2008-07-07 23:23:14.531
...,...,...,...,...
311208,262,130,discharge,2008-08-20 00:43:05.406
311410,263,133,charge,2008-08-20 02:06:37.718
312876,264,131,discharge,2008-08-20 05:02:00.296
313077,265,134,charge,2008-08-20 05:49:31.828


# EDA

In [7]:
eda = df[['cycle','ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']].describe()

In [8]:
eda

Unnamed: 0,cycle,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity
count,314676.0,314676.0,314676,314674.0,314674.0,314674.0,314676.0,314676.0,314676.0,312497.0
mean,114.52834,24.0,2008-07-27 12:07:19.933793024,4.076628,0.306064,25.913199,0.709639,4.017773,4722.178797,1.595307
min,1.0,24.0,2008-07-07 12:26:45.750000,2.278634,-4.508828,21.81017,-4.513,-0.007,0.0,1.341051
25%,45.0,24.0,2008-07-12 19:49:15.046000128,4.048386,0.036304,23.617034,0.058,4.224,1946.809,1.439271
50%,106.0,24.0,2008-07-31 00:11:53.156000,4.196494,0.15809,24.840642,0.306,4.284,4387.703,1.614007
75%,178.0,24.0,2008-08-08 02:32:53.249999872,4.199208,1.208289,27.520903,1.507,4.602,7426.613,1.731517
max,266.0,24.0,2008-08-20 08:37:19.515000,4.299586,1.536047,38.878688,1.999,4.996,10815.375,1.855005
std,77.04633,0.0,,0.241513,0.978067,2.989441,0.731413,1.1278,3111.160007,0.158633


In [9]:
# Cycle Type (Charge and Discharge)
ct = df.groupby('type')['cycle_type'].describe().transpose()
ct

type,charge,discharge
count,279810.0,34866.0
mean,57.870859,59.962657
std,38.949441,37.957008
min,1.0,1.0
25%,23.0,27.0
50%,54.0,57.0
75%,90.0,92.0
max,134.0,132.0


In [10]:
# right join of eda and ct
eda = eda.join(ct)
eda

Unnamed: 0,cycle,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity,charge,discharge
count,314676.0,314676.0,314676,314674.0,314674.0,314674.0,314676.0,314676.0,314676.0,312497.0,279810.0,34866.0
mean,114.52834,24.0,2008-07-27 12:07:19.933793024,4.076628,0.306064,25.913199,0.709639,4.017773,4722.178797,1.595307,57.870859,59.962657
min,1.0,24.0,2008-07-07 12:26:45.750000,2.278634,-4.508828,21.81017,-4.513,-0.007,0.0,1.341051,1.0,1.0
25%,45.0,24.0,2008-07-12 19:49:15.046000128,4.048386,0.036304,23.617034,0.058,4.224,1946.809,1.439271,23.0,27.0
50%,106.0,24.0,2008-07-31 00:11:53.156000,4.196494,0.15809,24.840642,0.306,4.284,4387.703,1.614007,54.0,57.0
75%,178.0,24.0,2008-08-08 02:32:53.249999872,4.199208,1.208289,27.520903,1.507,4.602,7426.613,1.731517,90.0,92.0
max,266.0,24.0,2008-08-20 08:37:19.515000,4.299586,1.536047,38.878688,1.999,4.996,10815.375,1.855005,134.0,132.0
std,77.04633,0.0,,0.241513,0.978067,2.989441,0.731413,1.1278,3111.160007,0.158633,38.949441,37.957008


In [11]:
# Number of registries per cycle type
df['type'].value_counts()

type
charge       279810
discharge     34866
Name: count, dtype: int64

In [12]:
eda.transpose()

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
cycle,314676.0,114.52834,1.0,45.0,106.0,178.0,266.0,77.04633
ambient_temperature,314676.0,24.0,24.0,24.0,24.0,24.0,24.0,0.0
timestamp,314676.0,2008-07-27 12:07:19.933793024,2008-07-07 12:26:45.750000,2008-07-12 19:49:15.046000128,2008-07-31 00:11:53.156000,2008-08-08 02:32:53.249999872,2008-08-20 08:37:19.515000,
Voltage_measured,314674.0,4.076628,2.278634,4.048386,4.196494,4.199208,4.299586,0.241513
Current_measured,314674.0,0.306064,-4.508828,0.036304,0.15809,1.208289,1.536047,0.978067
Temperature_measured,314674.0,25.913199,21.81017,23.617034,24.840642,27.520903,38.878688,2.989441
Current,314676.0,0.709639,-4.513,0.058,0.306,1.507,1.999,0.731413
Voltage,314676.0,4.017773,-0.007,4.224,4.284,4.602,4.996,1.1278
Time,314676.0,4722.178797,0.0,1946.809,4387.703,7426.613,10815.375,3111.160007
Capacity,312497.0,1.595307,1.341051,1.439271,1.614007,1.731517,1.855005,0.158633


In [13]:
# Count how many rows (registries) exist for each cycle
df.cycle.value_counts(sort = False)

cycle
1      2816
2       366
3      3777
4       362
5      3693
       ... 
262     202
263    1466
264     201
265    1399
266     200
Name: count, Length: 266, dtype: int64

# Save To csv

In [None]:
df.to_csv("vars_b18.csv", index = False)