In [1]:
import scipy.io
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
def extract_features(cycles, battery_name="BXXXX"):
    rows = []
    for i, cycle in enumerate(cycles, start=1):
        ctype = cycle["type"][0]
        if ctype != "discharge":
            continue

        data = cycle["data"][0,0]

        voltage = data["Voltage_measured"][0]
        current = data["Current_measured"][0]
        temp    = data["Temperature_measured"][0]
        capacity = data["Capacity"][0][0]

        # Features
        row = {
            "battery": battery_name,
            "cycle": i,
            "avg_voltage": np.mean(voltage),
            "min_voltage": np.min(voltage),
            "max_voltage": np.max(voltage),
            "avg_current": np.mean(current),
            "max_temp": np.max(temp),
            "avg_temp": np.mean(temp),
            "capacity": capacity
        }
        rows.append(row)

    df = pd.DataFrame(rows)

    # Add RUL target
    failure_cycle = df["cycle"].max()
    df["RUL"] = failure_cycle - df["cycle"]

    return df


In [3]:
import os

data_dir = r"C:\Users\deban\Desktop\Imp Docs\KonoPowah\Dataset"  # actual dataset folder
batteries = ["B0005.mat", "B0006.mat", "B0007.mat", "B0018.mat"]

all_dfs = []
for fname in batteries:
    path = os.path.join(data_dir, fname)
    mat = scipy.io.loadmat(path)

    key = fname.split(".")[0]  # e.g. "B0005"
    battery = mat[key][0,0]
    cycles = battery["cycle"][0]

    df_batt = extract_features(cycles, battery_name=key)
    all_dfs.append(df_batt)

df_all = pd.concat(all_dfs, ignore_index=True)
print(df_all.head())


  battery  cycle  avg_voltage  min_voltage  max_voltage  avg_current  \
0   B0005      2     3.529829     2.612467     4.191492    -1.818702   
1   B0005      4     3.537320     2.587209     4.189773    -1.817560   
2   B0005      6     3.543737     2.651917     4.188187    -1.816487   
3   B0005      8     3.543666     2.592948     4.188461    -1.825589   
4   B0005     10     3.542343     2.547420     4.188299    -1.826114   

    max_temp   avg_temp  capacity  RUL  
0  38.982181  32.572328  1.856487  612  
1  39.033398  32.725235  1.846327  610  
2  38.818797  32.642862  1.835349  608  
3  38.762305  32.514876  1.835263  606  
4  38.665393  32.382349  1.834646  604  


In [4]:
df_all

Unnamed: 0,battery,cycle,avg_voltage,min_voltage,max_voltage,avg_current,max_temp,avg_temp,capacity,RUL
0,B0005,2,3.529829,2.612467,4.191492,-1.818702,38.982181,32.572328,1.856487,612
1,B0005,4,3.537320,2.587209,4.189773,-1.817560,39.033398,32.725235,1.846327,610
2,B0005,6,3.543737,2.651917,4.188187,-1.816487,38.818797,32.642862,1.835349,608
3,B0005,8,3.543666,2.592948,4.188461,-1.825589,38.762305,32.514876,1.835263,606
4,B0005,10,3.542343,2.547420,4.188299,-1.826114,38.665393,32.382349,1.834646,604
...,...,...,...,...,...,...,...,...,...,...
631,B0018,309,3.455229,2.347109,4.186282,-1.782780,38.847490,31.818465,1.362737,10
632,B0018,311,3.461974,2.488429,4.185356,-1.772521,38.802834,32.000672,1.363405,8
633,B0018,314,3.452222,2.399218,4.181407,-1.769870,38.726991,31.739922,1.351865,5
634,B0018,317,3.447650,2.278634,4.185252,-1.788485,38.671920,31.526136,1.354797,2


In [5]:
battery_mapping = {'B0005': 0, 'B0006': 1, 'B0007': 2, 'B0018': 3}
df_all['battery_idx'] = df_all['battery'].map(battery_mapping)

In [6]:
def normalize_cycles(group):
    group = group.copy()
    max_cycle = group['cycle'].max()
    group['t_normalized'] = (group['cycle'] - group['cycle'].min()) / (max_cycle - group['cycle'].min())
    return group

df_all = df_all.groupby('battery').apply(normalize_cycles).reset_index(drop=True)

  df_all = df_all.groupby('battery').apply(normalize_cycles).reset_index(drop=True)


In [7]:
df_model = df_all[['battery_idx', 't_normalized', 'avg_temp', 'avg_current', 'capacity']].copy()
df_model.rename(columns={
    'avg_temp': 'T_celsius',
    'avg_current': 'I_amperes', 
    'capacity': 'C_target'
}, inplace=True)
scaler_T = MinMaxScaler()
scaler_I = MinMaxScaler()

df_model['T_celsius_norm'] = scaler_T.fit_transform(df_model[['T_celsius']])
df_model['I_amperes_norm'] = scaler_I.fit_transform(df_model[['I_amperes']])

print("Model-ready data shape:", df_model.shape)
print(df_model.head())

Model-ready data shape: (636, 7)
   battery_idx  t_normalized  T_celsius  I_amperes  C_target  T_celsius_norm  \
0            0      0.000000  32.572328  -1.818702  1.856487        0.562840   
1            0      0.003268  32.725235  -1.817560  1.846327        0.597688   
2            0      0.006536  32.642862  -1.816487  1.835349        0.578915   
3            0      0.009804  32.514876  -1.825589  1.835263        0.549747   
4            0      0.013072  32.382349  -1.826114  1.834646        0.519543   

   I_amperes_norm  
0        0.375020  
1        0.377386  
2        0.379609  
3        0.360754  
4        0.359667  


In [8]:
train_data, test_data = train_test_split(
    df_model, 
    test_size=0.2,  # 80-20 split
    random_state=42,  # For reproducibility
    stratify=df_model['battery_idx']  # Ensure all batteries are represented in both sets
)

print(f"Training samples: {len(train_data)}")
print(f"Test samples: {len(test_data)}")

# Save processed data
train_data.to_csv('train_data.csv', index=False)
test_data.to_csv('test_data.csv', index=False)

# Optional: Check battery distribution in both sets
print("\nTraining set battery distribution:")
print(train_data['battery_idx'].value_counts().sort_index())
print("\nTest set battery distribution:")
print(test_data['battery_idx'].value_counts().sort_index())

Training samples: 508
Test samples: 128

Training set battery distribution:
battery_idx
0    134
1    134
2    134
3    106
Name: count, dtype: int64

Test set battery distribution:
battery_idx
0    34
1    34
2    34
3    26
Name: count, dtype: int64
