In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Set project directory
project_dir = r"C:\Users\aknin\Desktop\MLproject-main"
data_path = os.path.join(project_dir, "margednew.csv")
output_path = os.path.join(project_dir, "normalized_margednew.csv")




# Define feature columns and label column
FEATURE_COLS = ['Voltage [V]', 'Current [A]', 'Temperature [degC]', 'Capacity [Ah]', 'Cumulative_Capacity_Ah']
LABEL_COL = 'SOC [-]'



# %%
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Define constants
SEQUENCE_LENGTH = 20
BATCH_SIZE = 128
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Custom dataset class
class BatteryDatasetLSTM(Dataset):
    def __init__(self, data_tensor, labels_tensor, sequence_length=SEQUENCE_LENGTH):
        self.sequence_length = sequence_length
        self.features = data_tensor
        self.labels = labels_tensor

    def __len__(self):
        return len(self.features) - self.sequence_length + 1

    def __getitem__(self, idx):
        sequence = self.features[idx:idx + self.sequence_length]
        label = self.labels[idx + self.sequence_length - 1]
        return sequence, label

# Load normalized data
normalized_path = os.path.join(project_dir, "normalized_margednew.csv")
df = pd.read_csv(normalized_path)

# Convert to tensors
features_tensor = torch.tensor(df[FEATURE_COLS].values, dtype=torch.float32).to(device)
labels_tensor = torch.tensor(df[LABEL_COL].values, dtype=torch.float32).to(device)

# Split the data (row-based since it's a single file)
train_data, temp_data = train_test_split(df, test_size=0.2, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Extract features and labels for each split
train_features = torch.tensor(train_data[FEATURE_COLS].values, dtype=torch.float32).to(device)
train_labels = torch.tensor(train_data[LABEL_COL].values, dtype=torch.float32).to(device)
val_features = torch.tensor(val_data[FEATURE_COLS].values, dtype=torch.float32).to(device)
val_labels = torch.tensor(val_data[LABEL_COL].values, dtype=torch.float32).to(device)
test_features = torch.tensor(test_data[FEATURE_COLS].values, dtype=torch.float32).to(device)
test_labels = torch.tensor(test_data[LABEL_COL].values, dtype=torch.float32).to(device)

# Create datasets with sequences
train_dataset = BatteryDatasetLSTM(train_features, train_labels)
val_dataset = BatteryDatasetLSTM(val_features, val_labels)
test_dataset = BatteryDatasetLSTM(test_features, test_labels)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train data shape: {train_features.shape}, Labels shape: {train_labels.shape}")
print(f"Validation data shape: {val_features.shape}, Labels shape: {val_labels.shape}")
print(f"Test data shape: {test_features.shape}, Labels shape: {test_labels.shape}")

# %%
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Define SoCLSTM
class SoCLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(SoCLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, dtype=x.dtype, device=x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, dtype=x.dtype, device=x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Training function
def train_and_validate(model, criterion, optimizer, train_loader, val_loader, epochs, device, patience=20, min_delta=0.001):
    history = {'train_loss': [], 'val_loss': []}
    best_val_loss = float('inf')
    epochs_no_improve = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device).unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        history['train_loss'].append(train_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for sequences, labels in val_loader:
                sequences, labels = sequences.to(device), labels.to(device).unsqueeze(1)
                outputs = model(sequences)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_loss /= len(val_loader)
        history['val_loss'].append(val_loss)

        if val_loss < best_val_loss - min_delta:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss}, Validation Loss: {val_loss}')

        if epochs_no_improve >= patience:
            print('Early stopping triggered')
            break

    return history

# Test function to evaluate model performance and plot
def test_model(model, criterion, test_loader, device, project_dir):
    model.eval()
    test_loss = 0.0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for sequences, labels in test_loader:
            sequences, labels = sequences.to(device), labels.to(device).unsqueeze(1)
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            all_predictions.extend(outputs.cpu().numpy().flatten())
            all_labels.extend(labels.cpu().numpy().flatten())
    test_loss /= len(test_loader)
    
    # Calculate MAE and MSE
    mae = mean_absolute_error(all_labels, all_predictions)
    mse = mean_squared_error(all_labels, all_predictions)
    
    print(f"Test Loss: {test_loss}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"Mean Squared Error (MSE): {mse}")

    # Plot true vs predicted values
    plt.figure(figsize=(10, 6))
    plt.scatter(all_labels, all_predictions, alpha=0.5, label='Predicted vs True SOC')
    plt.plot([0, 1], [0, 1], 'r--', label='Perfect Prediction (y=x)')
    plt.xlabel('True SOC')
    plt.ylabel('Predicted SOC')
    plt.title('True vs Predicted SOC on Test Set')
    plt.legend()
    plt.grid(True)
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.savefig(os.path.join(project_dir, "true_vs_predicted_soc_test.png"))
    plt.close()
    print("Plot saved as true_vs_predicted_soc_test.png")

    return test_loss, mae, mse

# Optuna optimization
def objective(trial):
    hidden_size = trial.suggest_int('hidden_size', 10, 100)
    num_layers = trial.suggest_int('num_layers', 1, 5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    model = SoCLSTM(input_size=len(FEATURE_COLS), hidden_size=hidden_size, num_layers=num_layers).to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    history = train_and_validate(model, criterion, optimizer, train_loader, val_loader, epochs=10, device=device)
    return history['val_loss'][-1]

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

best_hyperparams = study.best_trial.params
print(f"Best hyperparameters: {best_hyperparams}")

# Train with best hyperparameters
best_model = SoCLSTM(input_size=len(FEATURE_COLS), hidden_size=best_hyperparams['hidden_size'], num_layers=best_hyperparams['num_layers']).to(device)
optimizer = optim.Adam(best_model.parameters(), lr=best_hyperparams['learning_rate'])
criterion = nn.MSELoss()

history = train_and_validate(best_model, criterion, optimizer, train_loader, val_loader, epochs=20, device=device)

# Evaluate on test set
test_loss, mae, mse = test_model(best_model, criterion, test_loader, device, project_dir)

# Save the model
model_path = os.path.join(project_dir, "soc_lstm_model_new.pth")
torch.save({'model_state_dict': best_model.state_dict(), 'input_size': len(FEATURE_COLS)}, model_path)
print(f"Model saved to {model_path}")

Train data shape: torch.Size([394401, 5]), Labels shape: torch.Size([394401])
Validation data shape: torch.Size([49300, 5]), Labels shape: torch.Size([49300])
Test data shape: torch.Size([49301, 5]), Labels shape: torch.Size([49301])


[I 2025-06-13 23:12:04,627] A new study created in memory with name: no-name-610d4609-79f9-4a4e-895c-90e2029fba71


Epoch 1/10, Train Loss: 0.10544118075827977, Validation Loss: 0.04248309842856128
Epoch 2/10, Train Loss: 0.017745783054910473, Validation Loss: 0.008282716034220093
Epoch 3/10, Train Loss: 0.006691808950681466, Validation Loss: 0.0053986847655926986
Epoch 4/10, Train Loss: 0.004407424970843034, Validation Loss: 0.0035209903052951168
Epoch 5/10, Train Loss: 0.0029648820444342296, Validation Loss: 0.0024878918802500776
Epoch 6/10, Train Loss: 0.002294846634493233, Validation Loss: 0.0020961718416773988
Epoch 7/10, Train Loss: 0.0020279297261614603, Validation Loss: 0.0019213483494542386
Epoch 8/10, Train Loss: 0.0018925105803004013, Validation Loss: 0.0018164350005340825
Epoch 9/10, Train Loss: 0.0018058593746802468, Validation Loss: 0.0017464849174262742


[I 2025-06-13 23:17:06,334] Trial 0 finished with value: 0.0016955208711005227 and parameters: {'hidden_size': 16, 'num_layers': 2, 'learning_rate': 2.7385549904995332e-05}. Best is trial 0 with value: 0.0016955208711005227.


Epoch 10/10, Train Loss: 0.001741568366797882, Validation Loss: 0.0016955208711005227
Epoch 1/10, Train Loss: 0.0030026472242887354, Validation Loss: 0.0011234702328292393
Epoch 2/10, Train Loss: 0.001098015338645089, Validation Loss: 0.0010121379305796764
Epoch 3/10, Train Loss: 0.0009868217537960047, Validation Loss: 0.0010387762864373644
Epoch 4/10, Train Loss: 0.0009088300777012832, Validation Loss: 0.0009029049164608937
Epoch 5/10, Train Loss: 0.0008650479030597165, Validation Loss: 0.0008548974388862907
Epoch 6/10, Train Loss: 0.0008098913072866399, Validation Loss: 0.0007825319656483658
Epoch 7/10, Train Loss: 0.0007625368278826261, Validation Loss: 0.0007841487479525247
Epoch 8/10, Train Loss: 0.0007330657096729553, Validation Loss: 0.0007747975478903028
Epoch 9/10, Train Loss: 0.0007129237470852763, Validation Loss: 0.00074909019142088


[I 2025-06-13 23:43:30,205] Trial 1 finished with value: 0.0007430996654173194 and parameters: {'hidden_size': 84, 'num_layers': 4, 'learning_rate': 0.007436223908156232}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0007123447163001231, Validation Loss: 0.0007430996654173194
Epoch 1/10, Train Loss: 0.0030256724617862348, Validation Loss: 0.001266029893575656
Epoch 2/10, Train Loss: 0.0012071230711491257, Validation Loss: 0.0011311392739027318
Epoch 3/10, Train Loss: 0.0010952719699513283, Validation Loss: 0.001030504693795319
Epoch 4/10, Train Loss: 0.0010254423603193613, Validation Loss: 0.001029018427675392
Epoch 5/10, Train Loss: 0.0009856246211973599, Validation Loss: 0.0010003328338142369
Epoch 6/10, Train Loss: 0.0009545735724050205, Validation Loss: 0.0009661438729984377
Epoch 7/10, Train Loss: 0.0009312241397430382, Validation Loss: 0.0009143743928298089
Epoch 8/10, Train Loss: 0.0009100689523459997, Validation Loss: 0.0009250949467007563
Epoch 9/10, Train Loss: 0.000898529111600451, Validation Loss: 0.0008967581764979461


[I 2025-06-13 23:49:00,423] Trial 2 finished with value: 0.0008861488264782109 and parameters: {'hidden_size': 63, 'num_layers': 1, 'learning_rate': 0.0020971178732315508}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0008812766873653109, Validation Loss: 0.0008861488264782109
Epoch 1/10, Train Loss: 0.0021596380479897916, Validation Loss: 0.0011742964701804241
Epoch 2/10, Train Loss: 0.001130601178393377, Validation Loss: 0.0011054359034422805
Epoch 3/10, Train Loss: 0.0010502355487149262, Validation Loss: 0.0010939965516490466
Epoch 4/10, Train Loss: 0.0010076770380335843, Validation Loss: 0.0009657148455545292
Epoch 5/10, Train Loss: 0.0009566791316694617, Validation Loss: 0.0010291439470674583
Epoch 6/10, Train Loss: 0.0009198714534456009, Validation Loss: 0.0009056757368000195
Epoch 7/10, Train Loss: 0.0009202738080232958, Validation Loss: 0.0012102151216671143
Epoch 8/10, Train Loss: 0.0008960962125248378, Validation Loss: 0.0009708249860942615
Epoch 9/10, Train Loss: 0.0008908997395675385, Validation Loss: 0.001632559565294032


[I 2025-06-13 23:52:10,400] Trial 3 finished with value: 0.0008470373357274603 and parameters: {'hidden_size': 18, 'num_layers': 1, 'learning_rate': 0.030925785087872734}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0008882011025924191, Validation Loss: 0.0008470373357274603
Epoch 1/10, Train Loss: 0.07226208363775113, Validation Loss: 0.07413724461483523
Epoch 2/10, Train Loss: 0.07175507979548496, Validation Loss: 0.07252845002598868
Epoch 3/10, Train Loss: 0.07172996901449524, Validation Loss: 0.0712472211009789
Epoch 4/10, Train Loss: 0.07179012668719739, Validation Loss: 0.07106197657246972
Epoch 5/10, Train Loss: 0.0718015989314071, Validation Loss: 0.07289365764437099
Epoch 6/10, Train Loss: 0.07355709765215225, Validation Loss: 0.07102905409748381
Epoch 7/10, Train Loss: 0.07172547932412042, Validation Loss: 0.07117676309314726
Epoch 8/10, Train Loss: 0.07175060974447314, Validation Loss: 0.07129819762598666
Epoch 9/10, Train Loss: 0.07184138698146801, Validation Loss: 0.07097293561025296


[I 2025-06-14 01:07:19,306] Trial 4 finished with value: 0.07124307641634052 and parameters: {'hidden_size': 59, 'num_layers': 4, 'learning_rate': 0.02866306537488151}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0717695917653755, Validation Loss: 0.07124307641634052
Epoch 1/10, Train Loss: 0.003882461062495084, Validation Loss: 0.0011871361215610944
Epoch 2/10, Train Loss: 0.0011490620226182794, Validation Loss: 0.0011221533415400559
Epoch 3/10, Train Loss: 0.0010399187338455897, Validation Loss: 0.0009274432992676051
Epoch 4/10, Train Loss: 0.0009487000431607164, Validation Loss: 0.0009543580507725077
Epoch 5/10, Train Loss: 0.000912308651741049, Validation Loss: 0.0009003540498661143
Epoch 6/10, Train Loss: 0.0008881592207497526, Validation Loss: 0.0009239862710382268
Epoch 7/10, Train Loss: 0.000870919467927669, Validation Loss: 0.0009095009505764431
Epoch 8/10, Train Loss: 0.0008595768080131419, Validation Loss: 0.0008695020244325332
Epoch 9/10, Train Loss: 0.0008471605445128079, Validation Loss: 0.0010199701546816822


[I 2025-06-14 01:23:59,355] Trial 5 finished with value: 0.0008221709345427578 and parameters: {'hidden_size': 76, 'num_layers': 3, 'learning_rate': 0.0019807848778297522}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0008333156556162244, Validation Loss: 0.0008221709345427578
Epoch 1/10, Train Loss: 0.04238028302908096, Validation Loss: 0.0034838974214755503
Epoch 2/10, Train Loss: 0.002516089250004381, Validation Loss: 0.0020541410358324696
Epoch 3/10, Train Loss: 0.0018471156004063107, Validation Loss: 0.001646479703015479
Epoch 4/10, Train Loss: 0.0016505391481967818, Validation Loss: 0.0015775606047043606
Epoch 5/10, Train Loss: 0.0015217383680700479, Validation Loss: 0.0013949049251736744
Epoch 6/10, Train Loss: 0.0014323755812015032, Validation Loss: 0.001442277572995988
Epoch 7/10, Train Loss: 0.0013836553759830155, Validation Loss: 0.0013097684986954506
Epoch 8/10, Train Loss: 0.001339310904231864, Validation Loss: 0.0012911488001965893
Epoch 9/10, Train Loss: 0.0013094856321045948, Validation Loss: 0.001287999812940647


[I 2025-06-14 01:40:34,473] Trial 6 finished with value: 0.0015514186923355927 and parameters: {'hidden_size': 49, 'num_layers': 5, 'learning_rate': 0.0001121436948486914}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0012784657385991978, Validation Loss: 0.0015514186923355927
Epoch 1/10, Train Loss: 0.13879457627296796, Validation Loss: 0.06810049382593347
Epoch 2/10, Train Loss: 0.06246605214870146, Validation Loss: 0.04707794892239756
Epoch 3/10, Train Loss: 0.01693222955170545, Validation Loss: 0.00994949400665111
Epoch 4/10, Train Loss: 0.008999521342619268, Validation Loss: 0.00804949670937229
Epoch 5/10, Train Loss: 0.0071281190681477345, Validation Loss: 0.006258643252407958
Epoch 6/10, Train Loss: 0.0055605929059255686, Validation Loss: 0.00491144880457334
Epoch 7/10, Train Loss: 0.004359902041693176, Validation Loss: 0.003877940771601127
Epoch 8/10, Train Loss: 0.0035664251043674602, Validation Loss: 0.0032994924446510137
Epoch 9/10, Train Loss: 0.0031535977650373947, Validation Loss: 0.00299452927251854


[I 2025-06-14 01:50:01,333] Trial 7 finished with value: 0.002759796775861251 and parameters: {'hidden_size': 39, 'num_layers': 3, 'learning_rate': 1.3360903117234277e-05}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.002895265312659591, Validation Loss: 0.002759796775861251
Epoch 1/10, Train Loss: 0.10499521132527158, Validation Loss: 0.06890117863453732
Epoch 2/10, Train Loss: 0.03377116451894781, Validation Loss: 0.009131954134172136
Epoch 3/10, Train Loss: 0.008034341814861276, Validation Loss: 0.006776756983008501
Epoch 4/10, Train Loss: 0.0051065049398074614, Validation Loss: 0.003376471850267971
Epoch 5/10, Train Loss: 0.0025806101806214628, Validation Loss: 0.002111812229665603
Epoch 6/10, Train Loss: 0.0019906956154880605, Validation Loss: 0.0018528335127729798
Epoch 7/10, Train Loss: 0.0018189574489659327, Validation Loss: 0.0017412412994441098
Epoch 8/10, Train Loss: 0.001729796116942063, Validation Loss: 0.001669147867682858
Epoch 9/10, Train Loss: 0.0016711091139484657, Validation Loss: 0.0016556241703515274


[I 2025-06-14 01:58:19,928] Trial 8 finished with value: 0.001584121975139956 and parameters: {'hidden_size': 25, 'num_layers': 4, 'learning_rate': 3.898331915171574e-05}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0016294749708855882, Validation Loss: 0.001584121975139956
Epoch 1/10, Train Loss: 0.001861276898885827, Validation Loss: 0.001183523645060839
Epoch 2/10, Train Loss: 0.0010918928311597121, Validation Loss: 0.0011637381543682821
Epoch 3/10, Train Loss: 0.0009925103804892751, Validation Loss: 0.0009462819501807083
Epoch 4/10, Train Loss: 0.0009196379134456562, Validation Loss: 0.000893769235880394
Epoch 5/10, Train Loss: 0.0008761620078173766, Validation Loss: 0.0013167839939163158
Epoch 6/10, Train Loss: 0.0008525151513457751, Validation Loss: 0.000828430574295929
Epoch 7/10, Train Loss: 0.0008336222438981541, Validation Loss: 0.0008047848665897392
Epoch 8/10, Train Loss: 0.0008193661313573331, Validation Loss: 0.0008208427040317757
Epoch 9/10, Train Loss: 0.0008125453770430296, Validation Loss: 0.0009874823702627693


[I 2025-06-14 02:02:25,512] Trial 9 finished with value: 0.0007856705419325941 and parameters: {'hidden_size': 38, 'num_layers': 1, 'learning_rate': 0.0173897258756589}. Best is trial 1 with value: 0.0007430996654173194.


Epoch 10/10, Train Loss: 0.0008069819331674602, Validation Loss: 0.0007856705419325941
Best hyperparameters: {'hidden_size': 84, 'num_layers': 4, 'learning_rate': 0.007436223908156232}
Epoch 1/20, Train Loss: 0.0038533748067433233, Validation Loss: 0.0013604099188063903
Epoch 2/20, Train Loss: 0.0011198556775732663, Validation Loss: 0.0011862180670156321
Epoch 3/20, Train Loss: 0.0010100673968206967, Validation Loss: 0.002251499039639714
Epoch 4/20, Train Loss: 0.0009301368571633318, Validation Loss: 0.0009811100609334537
Epoch 5/20, Train Loss: 0.0008937840962389743, Validation Loss: 0.000968434374690191
Epoch 6/20, Train Loss: 0.0008735571017254722, Validation Loss: 0.0008636295043792783
Epoch 7/20, Train Loss: 0.000842403344613629, Validation Loss: 0.0008694385444543957
Epoch 8/20, Train Loss: 0.0008177894561400874, Validation Loss: 0.0008720928667595132
Epoch 9/20, Train Loss: 0.0007881302368995781, Validation Loss: 0.0007755608185766858
Epoch 10/20, Train Loss: 0.00076787560807714