In [1]:
%pip install numpy pandas matplotlib scipy
import numpy as np
import pandas as pd
from pathlib import Path
import scipy 
from matplotlib import pyplot as plt

RAW_DIR = Path('../data/Battery/1. BatteryAgingARC-FY08Q4')
PROCESSED_DIR = Path('../data/processed')
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

BATTERY_IDS = ['B0005', 'B0006', 'B0007', 'B0018']

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from pathlib import Path

def extract_discharge_cycles(mat_path: Path, battery_id: str) -> pd.DataFrame:
    mat = scipy.io.loadmat(mat_path, simplify_cells=True)
    cycles = mat[battery_id]['cycle']

    records = []
    discharge_index = 0

    for cycle in cycles:
        if cycle['type'] != 'discharge':
            continue

        data = cycle['data']
        discharge_index += 1

        records.append({
            'battery_id':        battery_id,
            'cycle_index':       discharge_index,
            'discharge_capacity': data.get('Capacity', np.nan),
            'max_temperature':   np.max(data['Temperature_measured']),
            'avg_voltage_load':  np.mean(data['Voltage_measured']),
            'time_to_discharge': data['Time'][-1],
        })

    return pd.DataFrame(records)

In [20]:
dfs = []

for battery_id in BATTERY_IDS:
    mat_path = RAW_DIR / f'{battery_id}.mat'
    df = extract_discharge_cycles(mat_path, battery_id)

    # Normalise SOH: capacity relative to first cycle
    df['soh'] = df['discharge_capacity'] / df['discharge_capacity'].iloc[0]

    df.to_csv(PROCESSED_DIR / f'{battery_id}.csv', index=False)
    dfs.append(df)
    print(f'{battery_id}: {len(df)} discharge cycles')

all_batteries_df = pd.concat(dfs, ignore_index=True)
all_batteries_df.to_csv(PROCESSED_DIR / 'all_batteries.csv', index=False)

all_batteries_df.head()


B0005: 168 discharge cycles
B0006: 168 discharge cycles
B0007: 168 discharge cycles
B0018: 132 discharge cycles


Unnamed: 0,battery_id,cycle_index,discharge_capacity,max_temperature,avg_voltage_load,time_to_discharge,soh
0,B0005,1,1.856487,38.982181,3.529829,3690.234,1.0
1,B0005,2,1.846327,39.033398,3.53732,3672.344,0.994527
2,B0005,3,1.835349,38.818797,3.543737,3651.641,0.988614
3,B0005,4,1.835263,38.762305,3.543666,3631.563,0.988567
4,B0005,5,1.834646,38.665393,3.542343,3629.172,0.988235


In [18]:
# Reload from CSV to confirm round-trip integrity
df = pd.read_csv(PROCESSED_DIR / 'all_batteries.csv')

print(df.dtypes)
print(f'\nShape: {df.shape}')
print(f'Nulls:\n{df.isnull().sum()}')
df.describe()

battery_id                str
cycle_index             int64
discharge_capacity    float64
max_temperature       float64
avg_voltage_load      float64
time_to_discharge     float64
soh                   float64
dtype: object

Shape: (636, 7)
Nulls:
battery_id            0
cycle_index           0
discharge_capacity    0
max_temperature       0
avg_voltage_load      0
time_to_discharge     0
soh                   0
dtype: int64


Unnamed: 0,cycle_index,discharge_capacity,max_temperature,avg_voltage_load,time_to_discharge,soh
count,636.0,636.0,636.0,636.0,636.0,636.0
mean,80.764151,1.581652,39.571456,3.498623,3116.977701,0.828482
std,47.137103,0.198765,1.438533,0.047532,242.197224,0.109306
min,1.0,1.153818,36.372088,3.403885,2742.843,0.566893
25%,40.0,1.421123,38.370834,3.46672,2891.99625,0.748067
50%,80.0,1.559695,39.671952,3.50252,3084.281,0.823178
75%,120.0,1.763486,40.870487,3.541059,3311.828,0.925262
max,168.0,2.035338,42.332522,3.573551,3690.234,1.0
