In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
#!pip install tensorflow==2.12.0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
Data_Path = 'D:\\Masrafe\\Coding\\Git_Hub_code\\ml_project\\SOC_Prediction\\Dataset\\Lithium-Ion Battery Drive Cycle Dataset\\2_preprocessed'

In [3]:
# Collect data from all CSVs
all_data = []
for folder in os.listdir(Data_Path):
    folder_path = os.path.join(Data_Path, folder)
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            if file.endswith('.csv'):
                df = pd.read_csv(os.path.join(folder_path, file))
                all_data.append(df)

df_all = pd.concat(all_data, ignore_index=True)


In [4]:
# Use a subset for simplicity (you can expand to all data)
sequence_df = df_all[['Voltage', 'Current', 'Temperature', 'SOC']].copy()

# Normalize
scaler = MinMaxScaler()
sequence_df[:] = scaler.fit_transform(sequence_df)

# Create sequences (window of past 20 timesteps to predict next SOC)
def create_sequences(data, sequence_length=20):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length, :-1])  # features
        y.append(data[i + sequence_length, -1])     # SOC
    return np.array(X), np.array(y)

data_np = sequence_df.to_numpy()
X_lstm, y_lstm = create_sequences(data_np, sequence_length=20)

X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)


In [5]:
model = Sequential()
model.add(LSTM(64, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]), return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train
model.fit(X_train_lstm, y_train_lstm, validation_split=0.2, epochs=10, batch_size=64)

# Predict and Evaluate
y_pred_lstm = model.predict(X_test_lstm).flatten()
print("LSTM RMSE:", np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm)))
print("LSTM R²:", r2_score(y_test_lstm, y_pred_lstm))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
LSTM RMSE: 0.01617499361125265
LSTM R²: 0.9964573685445215


In [9]:
rmse = np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm))
mae = mean_absolute_error(y_test_lstm, y_pred_lstm)
r2 = r2_score(y_test_lstm, y_pred_lstm)

In [12]:
epochs = 10
print(f"Test: Epoch {epochs}")
print(f"Test R²: {r2:.3f}")
print(f"Test RMSE: {rmse:.3f}")
print(f"Test MAE: {mae:.3f}")

Test: Epoch 10
Test R²: 0.996
Test RMSE: 0.016
Test MAE: 0.011


In [13]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import KBinsDiscretizer
from tensorflow.keras.utils import to_categorical

# Step 1: Define number of bins for classification
n_bins = 10  # You can choose 5, 10, etc.

# Step 2: Bin SOC into discrete classes using KBinsDiscretizer
discretizer = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
y_test_cls = discretizer.fit_transform(y_test_lstm.reshape(-1, 1)).astype(int).flatten()
y_pred_cls = discretizer.transform(y_pred_lstm.reshape(-1, 1)).astype(int).flatten()

# Step 3: Compute classification metrics
accuracy = accuracy_score(y_test_cls, y_pred_cls)
f1 = f1_score(y_test_cls, y_pred_cls, average='macro')  # macro = class-agnostic average
y_test_onehot = to_categorical(y_test_cls, num_classes=n_bins)
y_pred_onehot = to_categorical(y_pred_cls, num_classes=n_bins)

# Step 4: Calculate AUC using one-vs-rest
try:
    auc = roc_auc_score(y_test_onehot, y_pred_onehot, multi_class='ovr')
except ValueError:
    auc = None  # In case some bins are missing

# Step 5: Print results in your format
epochs = 10  # change to your actual training epoch count
print(f"Test: Epoch {epochs}")
print(f"Test ACC: {accuracy:.3f}")
print(f"Test F1: {f1:.3f}")
print(f"Test AUC: {auc:.3f}" if auc is not None else "Test AUC: N/A (insufficient class coverage)")


Test: Epoch 10
Test ACC: 0.897
Test F1: 0.898
Test AUC: 0.942
