# Processing the RPT profile of the TUM cycling dataset

In [2]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib 
%matplotlib notebook
import sys
import numpy as np
import pandas as pd
import os
import pickle
import random
from os import listdir
import tensorflow as tf
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
# import keras_tuner as kt
from tensorflow import keras
from timeit import default_timer as timer
import seaborn as sns
from scipy.io import loadmat
from pykalman import KalmanFilter

In [8]:
CU_list = listdir('E:\Thesis CEVT\Dataset\TUM\CU_Cyclic')
CU_list.sort()
print(CU_list)

['CU000_cyc', 'CU001_cyc', 'CU002_cyc', 'CU003_cyc', 'CU004_cyc', 'CU005_cyc', 'CU006_cyc', 'CU007_cyc', 'CU008_cyc', 'CU009_cyc', 'CU010_cyc', 'CU011_cyc', 'CU012_cyc', 'CU013_cyc', 'CU014_cyc', 'CU015_cyc', 'CU016_cyc', 'CU017_cyc', 'CU018_cyc', 'CU019_cyc', 'CU020_cyc', 'CU021_cyc', 'CU022_cyc', 'CU023_cyc', 'CU024_cyc', 'CU025_cyc', 'CU026_cyc', 'CU027_cyc', 'CU028_cyc', 'CU029_cyc', 'CU030_cyc', 'CU031_cyc', 'CU032_cyc', 'CU033_cyc', 'CU034_cyc', 'CU035_cyc', 'CU036_cyc', 'CU037_cyc', 'CU038_cyc', 'CU039_cyc', 'CU040_cyc', 'CU041_cyc', 'CU042_cyc', 'CU043_cyc', 'CU044_cyc', 'CU045_cyc', 'CU046_cyc', 'CU047_cyc', 'CU048_cyc', 'CU049_cyc', 'CU050_cyc', 'CU051_cyc', 'CU052_cyc', 'CU053_cyc', 'CU054_cyc', 'CU055_cyc', 'CU056_cyc', 'CU057_cyc', 'CU058_cyc', 'CU059_cyc', 'CU060_cyc', 'CU061_cyc', 'CU062_cyc', 'CU063_cyc', 'CU064_cyc', 'CU065_cyc', 'CU066_cyc', 'CU067_cyc', 'CU068_cyc', 'CU069_cyc', 'CU070_cyc', 'CU071_cyc', 'CU072_cyc', 'CU073_cyc', 'CU074_cyc', 'CU075_cyc', 'CU076_cyc'

In [9]:
for CU_id in tqdm(CU_list):
    Cell_file = listdir(f'E:\Thesis CEVT\Dataset\TUM\CU_Cyclic\{CU_id}')

 99%|█████████▉| 82/83 [00:02<00:00, 28.93it/s]


NotADirectoryError: [WinError 267] The directory name is invalid: 'E:\\Thesis CEVT\\Dataset\\TUM\\CU_Cyclic\\out_data_CU000.pickle'

In [10]:
out_data = dict()
for CU_id in tqdm(CU_list):
    Cell_file = listdir(f'E:\Thesis CEVT\Dataset\TUM\CU_Cyclic\{CU_id}')
    for cell in Cell_file:
        if '.mat' in cell:
            file_path = f'E:\Thesis CEVT\Dataset\TUM\CU_Cyclic\{CU_id}\{cell}'
            mat_data = loadmat(file_path, squeeze_me=True, struct_as_record=False)
            columns_to_extract = [
                'Time', 'DataSet', 'tStep', 'Line', 'Command', 'U', 'I', 'Ah', 'AhStep', 'AhSet',
                 'Wh',  'T1', 'RAC', 'RDC', 'CycCount', 
                'State']
            dataset = mat_data.get('Dataset')
            data = {}
            for field in columns_to_extract:
                field_value = getattr(dataset, field, None)
                if field_value is not None:
                    if isinstance(field_value, np.ndarray):
                        data[field] = field_value.squeeze()
            df = pd.DataFrame(data)
            if df['Time'].iloc[-1] < 100:
                df['Time'] = df['Time']*3600

            discharge_1 = df[(df['Command']=='Discharge') & (df['Line']==20) | (df['Line']==21)]
            discharge_2 = df[(df['Command']=='Discharge') & (df['Line']==27) | (df['Line']==28)]
            dis_cap_1 = -sum(discharge_1['Time'].diff().iloc[1:]*discharge_1['I'].iloc[1:])/3600
            dis_cap_2 = -sum(discharge_2['Time'].diff().iloc[1:]*discharge_2['I'].iloc[1:])/3600
            dis_cap = (dis_cap_1 + dis_cap_2) / 2
            charge_1 = df[(df['Command']=='Charge') & (df['Line']==16) | (df['Line']==17)]
            charge_2 = df[(df['Command']=='Charge') & (df['Line']==23) | (df['Line']==24)]
            cha_cap_1 = sum(charge_1['Time'].diff().iloc[1:]*charge_1['I'].iloc[1:])/3600
            cha_cap_2 = sum(charge_2['Time'].diff().iloc[1:]*charge_2['I'].iloc[1:])/3600
            cha_cap = (cha_cap_1 + cha_cap_2) / 2

            if f'{cell[7:10]}' in out_data.keys():
                out_data[f'{cell[7:10]}']['Capacity_dis'].append(dis_cap)
                out_data[f'{cell[7:10]}']['Capacity_cha'].append(cha_cap)
                out_data[f'{cell[7:10]}']['Current'].append(charge_2['I'].values)
                out_data[f'{cell[7:10]}']['Voltage'].append(charge_2['U'].values)
                out_data[f'{cell[7:10]}']['Temperature'].append(charge_2['T1'].values)
                out_data[f'{cell[7:10]}']['Time'].append(charge_2['Time'].values)
            else:
                out_data[f'{cell[7:10]}'] = dict()
                out_data[f'{cell[7:10]}']['Capacity_dis'] = [dis_cap]
                out_data[f'{cell[7:10]}']['Capacity_cha'] = [cha_cap]
                out_data[f'{cell[7:10]}']['Current'] = [charge_2['I'].values]
                out_data[f'{cell[7:10]}']['Voltage'] = [charge_2['U'].values]
                out_data[f'{cell[7:10]}']['Temperature'] = [charge_2['T1'].values]
                out_data[f'{cell[7:10]}']['Time'] = [charge_2['Time'].values]

  0%|          | 0/83 [00:12<?, ?it/s]


KeyboardInterrupt: 

In [22]:
print(cell)

BW-VTC-246_2532_CU_cyc_002_BW-VTC-CYC.mat


In [3]:
out_data.keys()

NameError: name 'out_data' is not defined

In [24]:
len(out_data['236']['Capacity_dis'])

3

In [None]:
#for cell_id in out_data.keys():
#    plt.figure(figsize=(12, 8))
#    plt.plot(out_data[cell_id]['Capacity_dis'])
# plt.ylim(2, 2.5)
    
matplotlib.pyplot.close()

In [4]:
with open('E:\Thesis CEVT\Dataset\TUM\CU_Cyclic\out_data_CU000.pickle', 'wb') as handle:
    pickle.dump(out_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

NameError: name 'out_data' is not defined

In [25]:
for cell_id in out_data.keys():
    cell_data = {
        'Voltage': out_data[cell_id]['Voltage'],
        'Current': out_data[cell_id]['Current'],
        'Temperature': out_data[cell_id]['Temperature'],
        'Capacity_dis': out_data[cell_id]['Capacity_dis'],
        'Capacity_cha': out_data[cell_id]['Capacity_cha'],
        'Time': out_data[cell_id]['Time']
    }
    # Example operation on cell_data, e.g., printing
    print(f"Data for cell ID {cell_id}: {cell_data}")

Data for cell ID 210: {'Voltage': [array([2.54739029, 2.58284749, 2.93360691, 3.01862795, 3.07619824,
       3.12118695, 3.15855045, 3.19057631, 3.21878957, 3.24395275,
       3.26663773, 3.28760704, 3.30667006, 3.32420803, 3.34060222,
       3.35585263, 3.36976863, 3.38273147, 3.39474117, 3.40598835,
       3.41609175, 3.42543262, 3.43420161, 3.44201744, 3.44907076,
       3.45574281, 3.46165234, 3.46698999, 3.47156511, 3.47537771,
       3.47861842, 3.48128725, 3.48376544, 3.48586237, 3.4879593 ,
       3.4898656 , 3.4917719 , 3.4936782 , 3.49577514, 3.49768144,
       3.49958774, 3.50168467, 3.50359097, 3.5056879 , 3.50778483,
       3.50969113, 3.51159744, 3.51369437, 3.51560067, 3.5176976 ,
       3.5196039 , 3.5215102 , 3.52360713, 3.52551343, 3.52741974,
       3.52951667, 3.53142297, 3.53332927, 3.53523557, 3.5373325 ,
       3.5392388 , 3.54133573, 3.54324203, 3.54514834, 3.54724527,
       3.5493422 , 3.5512485 , 3.55334543, 3.55544236, 3.55753929,
       3.55963623, 3.561923

In [None]:
len(cell_data)

In [None]:

voltage_data = np.array(out_data[cell_id]['Voltage'])  # Convert to NumPy array if it's not already
print((voltage_data < 4.0) & (voltage_data > 3.4))


In [1]:
colors = plt.cm.jet(np.linspace(0,0.5,len(out_data[cell_id]['Voltage'])))
for cell_id in out_data.keys():
    plt.figure(figsize=[10, 6])
    print(f"{cell_id} ICA analysis")

    for cycle_index, voltage in enumerate(out_data[cell_id]['Voltage']):
        current = out_data[cell_id]['Current'][cycle_index]
        time = out_data[cell_id]['Time'][cycle_index]

        # Calculate dQ and dV, ensure arrays are prepared properly
        time_diff = np.diff(time, prepend=time[0])
        dQ = current * time_diff / 3600  # Ah
        dV = np.diff(voltage, prepend=voltage[0])
        dq_dv = np.divide(dQ, dV, out=np.zeros_like(dQ), where=dV!=0)
        

        # Apply Kalman Filter to dQ/dV
        kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1, n_dim_state=1,
                          transition_matrices=[1], observation_matrices=[1],
                          initial_state_covariance=1, observation_covariance=1,
                          transition_covariance=0.1)
        dq_dv_filtered, _ = kf.filter(dq_dv.reshape(-1, 1))

        # Ensure the voltage array is prepared for plotting
        # Trimming or adjusting voltage array based on filtered dQ/dV length
        voltage_for_plot = voltage[1:len(dq_dv_filtered.ravel())+1]

        if len(voltage_for_plot) != len(dq_dv_filtered.ravel()):
            # If lengths still don't match, further adjust to ensure exact match
            min_length = min(len(voltage_for_plot), len(dq_dv_filtered.ravel()))
            voltage_for_plot = voltage_for_plot[:min_length]
            dq_dv_filtered = dq_dv_filtered[:min_length]

        plt.plot(voltage_for_plot, dq_dv_filtered.ravel(), color=colors[cycle_index],label=f'Cycle {cycle_index+1}')


    plt.xlabel('Voltage (V)')
    plt.ylabel('dQ/dV (Filtered)')
    plt.title(f'ICA Analysis for Cell {cell_id}')
    plt.show()

NameError: name 'plt' is not defined

In [None]:
for cell_id in out_data.keys():
    for i, voltage in enumerate(out_data[cell_id]['Voltage']):
        plt.plot(voltage, label=f'Cycle {i}')
    #plt.legend()
    plt.xlabel('Time')
    plt.ylabel('Voltage')
    plt.title(f'Voltage over Time for Cell_id {cell_id}')
    plt.show()
else:
    print(f"No data available for cell ID: {cell_id}")




In [None]:
cell_id = '236'
for i in range(len(out_data[cell_id]['Current'])):
    plt.plot(out_data[cell_id]['Voltage'][i], label=f'{i}')
plt.legend()

In [None]:
file_path = f'/mimer/NOBACKUP/groups/yizhou_battery/Fast_diagnostic_Thesis/CU_Cyclic/CU003_cyc/BW-VTC-271_3054_CU_cyc_003_BW-VTC-CYC2.mat'
mat_data = loadmat(file_path, squeeze_me=True, struct_as_record=False)
columns_to_extract = [
    'Time', 'DataSet', 'tStep', 'Line', 'Command', 'U', 'I', 'Ah', 'AhStep', 'AhSet',
     'Wh',  'T1', 'RAC', 'RDC', 'CycCount', 
    'State']
dataset = mat_data.get('Dataset')
data = {}
for field in columns_to_extract:
    field_value = getattr(dataset, field, None)
    if field_value is not None:
        if isinstance(field_value, np.ndarray):
            data[field] = field_value.squeeze()
df = pd.DataFrame(data)
# df['Time'] = df['Time']*3600

In [None]:
print(df[(df['Command']=='Charge')]['Line'].unique())
print(df[(df['Command']=='Discharge')]['Line'].unique())

In [None]:
discharge_1 = df[(df['Command']=='Discharge') & (df['Line']==20) | (df['Line']==21)]
discharge_2 = df[(df['Command']=='Discharge') & (df['Line']==27) | (df['Line']==28)]

In [None]:
discharge_1['Time'].diff().iloc[1:]

In [None]:
dis_cap_1 = -sum(discharge_1['Time'].diff().iloc[1:]*discharge_1['I'].iloc[1:])/3600
dis_cap_2 = -sum(discharge_2['Time'].diff().iloc[1:]*discharge_2['I'].iloc[1:])/3600
print(f'The discharge capacity is: {(dis_cap_1+dis_cap_2)/2}Ah')

In [None]:
dis_cap_2

In [None]:
data = {}
for field in columns_to_extract:
    field_value = getattr(dataset, field, None)
    if field_value is not None:
        if isinstance(field_value, np.ndarray):
            data[field] = field_value.squeeze()
df = pd.DataFrame(data)

In [None]:
df['Time'] = df['Time']*3600

In [None]:
df['Command'].unique()

In [None]:
df[df['Command']=='Discharge']['Line'].unique()

In [None]:
discharge_1 = df[(df['Command']=='Discharge') & (df['Line']==20) | (df['Line']==21)]
discharge_2 = df[(df['Command']=='Discharge') & (df['Line']==27) | (df['Line']==28)]

In [None]:
dis_cap_1 = -sum(discharge_1['Time'].diff().iloc[1:]/3600*discharge_1['I'].iloc[1:])/3600
dis_cap_2 = -sum(discharge_2['Time'].diff().iloc[1:]/3600*discharge_2['I'].iloc[1:])/3600
dis_cap = (dis_cap_1 + dis_cap_2) / 2

In [None]:
dis_cap

In [None]:
plt.plot(df[(df['Command']=='Discharge') & (df['Line']==20) | (df['Line']==21)]['I'])

In [None]:
plt.plot(df[(df['Command']=='Discharge') & (df['Line']==21)]['I'])

In [None]:
plt.plot(df['U'])