# Libraries

In [15]:
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load B0007 file

In [16]:
#File Path
data_file_path = 'C:/Users/ibrah/Desktop/Masters/Dissertation Progress/Datasets/Experimental Datasets/Battery Data Set/B0007.mat'

In [17]:
# Load B0007 file
battery = 'B0007'
mat = scipy.io.loadmat(data_file_path, simplify_cells=True)

In [18]:
# create the dataframe with all battery's charge and discharge cycles

df = pd.DataFrame()

i = 0  # count all cycles
j = 0  # count only charge cycles
k = 0  # count only discharge cycles

for idx in range(len(mat[battery]['cycle'])):

    if mat[battery]['cycle'][idx]['type'] in ['charge', 'discharge']:

        i += 1

        # creates a string with the cycle timestamp
        time_string = '{}-{}-{} {}:{}:{}'.format(
            str(int(mat[battery]['cycle'][idx]['time'][0])),
            str(int(mat[battery]['cycle'][idx]['time'][1])),
            str(int(mat[battery]['cycle'][idx]['time'][2])),
            str(int(mat[battery]['cycle'][idx]['time'][3])),
            str(int(mat[battery]['cycle'][idx]['time'][4])),
            str(mat[battery]['cycle'][idx]['time'][5])
        )

        # reads cycle data
        df_aux = pd.DataFrame(mat[battery]['cycle'][idx]['data'])
        
        # if it is a charging cycle, fill in the Capacity column with the value of the capacity of the next discharging cycle, if any
        if mat[battery]['cycle'][idx]['type']=='charge':
            j += 1
            cycle_type_value = j
            if idx+1<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+1]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+1]['data']).Capacity.mean()
            elif idx+2<len(mat[battery]['cycle']) and mat[battery]['cycle'][idx+2]['type']=='discharge':
                capacity_value = pd.DataFrame(mat[battery]['cycle'][idx+2]['data']).Capacity.mean()
            else:
                capacity_value = np.nan
            df_aux = (df_aux
                        .assign(Capacity = capacity_value)
                        .rename(columns={'Current_charge': 'Current', 'Voltage_charge': 'Voltage'})
                    )
        elif mat[battery]['cycle'][idx]['type']=='discharge':
            k += 1
            cycle_type_value = k
            df_aux = df_aux.rename(columns={'Current_load': 'Current', 'Voltage_load': 'Voltage'})

        # create auxiliary columns with the remaining cycle information
        df_aux = df_aux.assign(
                cycle = i,
                cycle_idx = idx,
                cycle_type = cycle_type_value,
                type = mat[battery]['cycle'][idx]['type'],
                ambient_temperature = mat[battery]['cycle'][idx]['ambient_temperature'],
                timestamp = pd.to_datetime(time_string)
            )

        # combine the data from this cycle with the data from previous cycles
        df = pd.concat([df, df_aux], axis=0)

# rearange the dataframe
# Starting from the indexes
df = df.reset_index()
# Columns ordering
cols = ['cycle', 'cycle_type', 'cycle_idx', 'index', 'type', 'ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']
df = df[cols]

In [19]:
df.head(10)

Unnamed: 0,cycle,cycle_type,cycle_idx,index,type,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity
0,1,1,0,0,charge,24,2008-04-02 13:08:17.921,3.866123,-0.00383,24.434244,-0.0006,0.002,0.0,1.891052
1,1,1,0,1,charge,24,2008-04-02 13:08:17.921,3.6449,-2.261867,24.441053,-2.2697,2.576,2.532,1.891052
2,1,1,0,2,charge,24,2008-04-02 13:08:17.921,4.001099,1.489161,24.445727,1.4995,4.719,5.5,1.891052
3,1,1,0,3,charge,24,2008-04-02 13:08:17.921,4.011041,1.491029,24.459603,1.4995,4.745,8.344,1.891052
4,1,1,0,4,charge,24,2008-04-02 13:08:17.921,4.017485,1.491413,24.458385,1.4995,4.745,11.125,1.891052
5,1,1,0,5,charge,24,2008-04-02 13:08:17.921,4.023051,1.489503,24.459908,1.4995,4.745,13.891,1.891052
6,1,1,0,6,charge,24,2008-04-02 13:08:17.921,4.027732,1.488795,24.470894,1.4995,4.757,16.672,1.891052
7,1,1,0,7,charge,24,2008-04-02 13:08:17.921,4.031854,1.489901,24.478263,1.4995,4.757,19.5,1.891052
8,1,1,0,8,charge,24,2008-04-02 13:08:17.921,4.036218,1.490722,24.478615,1.4995,4.77,22.282,1.891052
9,1,1,0,9,charge,24,2008-04-02 13:08:17.921,4.039863,1.488136,24.483268,1.4995,4.77,25.063,1.891052


In [20]:
#View only one registry per cycle (charge and discharge)
cycles = df.filter(['cycle','cycle_type','type','timestamp']).drop_duplicates()
cycles

Unnamed: 0,cycle,cycle_type,type,timestamp
0,1,1,charge,2008-04-02 13:08:17.921
789,2,1,discharge,2008-04-02 15:25:41.593
986,3,2,charge,2008-04-02 16:37:51.984
1926,4,2,discharge,2008-04-02 19:43:48.406
2122,5,3,charge,2008-04-02 20:55:40.812
...,...,...,...,...
583759,334,168,charge,2008-05-26 21:41:33.468
587251,335,167,discharge,2008-05-27 15:52:41.359
587549,336,169,charge,2008-05-27 17:53:59.765
591153,337,168,discharge,2008-05-27 20:45:42.125


# EDA

In [21]:
eda = df[['cycle','ambient_temperature',
        'timestamp', 'Voltage_measured', 'Current_measured','Temperature_measured',
        'Current', 'Voltage', 'Time', 'Capacity']].describe()

In [22]:
# Cycle Type (Charge and Discharge)
ct = df.groupby('type')['cycle_type'].describe().transpose()
ct

type,charge,discharge
count,541173.0,50285.0
mean,96.606471,88.125942
std,42.702483,45.699687
min,1.0,1.0
25%,62.0,50.0
50%,97.0,88.0
75%,133.0,127.0
max,170.0,168.0


In [23]:
# right join of eda and ct
eda = eda.join(ct)
eda

Unnamed: 0,cycle,ambient_temperature,timestamp,Voltage_measured,Current_measured,Temperature_measured,Current,Voltage,Time,Capacity,charge,discharge
count,591458.0,591458.0,591458,591458.0,591458.0,591458.0,591458.0,591458.0,591458.0,591453.0,541173.0,50285.0
mean,189.704094,24.0,2008-05-09 02:17:02.448848128,4.099742,0.386975,26.119363,0.678232,3.87444,4763.856934,1.610818,96.606471,88.125942
min,1.0,24.0,2008-04-02 13:08:17.921000,0.002932,-2.261867,22.969923,-2.2697,0.0,0.0,1.400455,1.0,1.0
25%,119.0,24.0,2008-05-01 14:41:41.577999872,4.053077,0.042134,24.264849,0.0681,4.25,1928.17575,1.482535,62.0,50.0
50%,191.0,24.0,2008-05-10 16:11:56.108999936,4.209444,0.197802,25.29919,0.3325,4.326,4467.5155,1.59041,97.0,88.0
75%,264.0,24.0,2008-05-18 23:53:33.780999936,4.212002,1.487893,27.051967,1.4995,4.669,7503.824,1.739646,133.0,127.0
max,338.0,24.0,2008-05-28 11:09:42.046000,8.332909,1.507169,42.332522,2.0,4.998,10807.328,1.891052,170.0,168.0
std,86.25014,0.0,,0.220993,0.927493,2.720203,0.749201,1.428302,3147.859026,0.144378,42.702483,45.699687


In [24]:
# Number of registries per cycle type
df['type'].value_counts()

type
charge       541173
discharge     50285
Name: count, dtype: int64

In [None]:
eda.transpose()

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
cycle,591458.0,189.704094,1.0,119.0,191.0,264.0,338.0,86.25014
ambient_temperature,591458.0,24.0,24.0,24.0,24.0,24.0,24.0,0.0
timestamp,591458.0,2008-05-09 02:17:02.448848128,2008-04-02 13:08:17.921000,2008-05-01 14:41:41.577999872,2008-05-10 16:11:56.108999936,2008-05-18 23:53:33.780999936,2008-05-28 11:09:42.046000,
Voltage_measured,591458.0,4.099742,0.002932,4.053077,4.209444,4.212002,8.332909,0.220993
Current_measured,591458.0,0.386975,-2.261867,0.042134,0.197802,1.487893,1.507169,0.927493
Temperature_measured,591458.0,26.119363,22.969923,24.264849,25.29919,27.051967,42.332522,2.720203
Current,591458.0,0.678232,-2.2697,0.0681,0.3325,1.4995,2.0,0.749201
Voltage,591458.0,3.87444,0.0,4.25,4.326,4.669,4.998,1.428302
Time,591458.0,4763.856934,0.0,1928.17575,4467.5155,7503.824,10807.328,3147.859026
Capacity,591453.0,1.610818,1.400455,1.482535,1.59041,1.739646,1.891052,0.144378


In [26]:
# Count how many rows (registries) exist for each cycle
df.cycle.value_counts(sort = False)

cycle
1       789
2       197
3       940
4       196
5       937
       ... 
334    3492
335     298
336    3604
337     300
338       5
Name: count, Length: 338, dtype: int64

# Save To csv

In [None]:
df.to_csv('vars_b7.csv', index=False)