In [4]:
import os
import glob
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import permutations
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from torch import nn, optim
import torch
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [5]:
def extract_charging_current(filename):
    match = re.search(r'_(\d+)C', filename)
    return float(match.group(1)) if match else 0.0

def load_battery_data(battery_folder):
    data_list = []
    for file in glob.glob(os.path.join(battery_folder, "*.xlsx")):
        df = pd.read_excel(file)
        df = df[['Test_Time(s)', 'Voltage(V)', 'Current(A)', 'Surface_Temp(degC)']].dropna()
        df['Charging_Current'] = extract_charging_current(file)
        data_list.append(df)
    return pd.concat(data_list, ignore_index=True)

battery_folders = {
    "LFP": "/home/aadya/scratch/EE499/battery/lfp_25degC",  # change paths
    "NMC": "/home/aadya/scratch/EE499/battery/nmc_25degC",
    "NCA": "/home/aadya/scratch/EE499/battery/nca_25degC"
}

all_data = {name: load_battery_data(path) for name, path in battery_folders.items()}


In [6]:

class LSTMModel(nn.Module):
    def __init__(self, input_dim=4, hidden_dim=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])


In [7]:
def prepare_sequences(df, lookback=10):
    features = ['Test_Time(s)', 'Voltage(V)', 'Current(A)', 'Charging_Current']
    target = 'Surface_Temp(degC)'

    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_scaled = scaler_X.fit_transform(df[features])
    y_scaled = scaler_y.fit_transform(df[[target]])

    X_seq, y_seq = [], []
    for i in range(len(X_scaled) - lookback):
        X_seq.append(X_scaled[i:i + lookback])
        y_seq.append(y_scaled[i + lookback])

    return torch.tensor(X_seq, dtype=torch.float32), torch.tensor(y_seq, dtype=torch.float32), scaler_y


In [8]:
def train_model(model, train_loader, val_loader, epochs=10, save_path=None):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        epoch_train_loss = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()
            epoch_train_loss += loss.item()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb)
                val_loss += criterion(pred, yb).item()

        train_losses.append(epoch_train_loss / len(train_loader))
        val_losses.append(val_loss / len(val_loader))
        print(f"Epoch {epoch+1} Train Loss: {train_losses[-1]:.4f} Val Loss: {val_losses[-1]:.4f}")

    if save_path:
        torch.save(model.state_dict(), save_path)

    return train_losses, val_losses


In [None]:
lookback = 10
batch_size = 64
results = {}

os.makedirs("plots", exist_ok=True)
os.makedirs("models", exist_ok=True)

for train1, train2, test in permutations(battery_folders.keys(), 3):
    print(f"\nTraining on {train1} + {train2} + 20% of {test} → Testing on remaining 80% of {test}")

    df_train = pd.concat([
        all_data[train1],
        all_data[train2],
        all_data[test].iloc[:int(0.2 * len(all_data[test]))]
    ], ignore_index=True)

    df_test = all_data[test].iloc[int(0.2 * len(all_data[test])):].reset_index(drop=True)

    X_train, y_train, scaler_y = prepare_sequences(df_train, lookback)
    X_test, y_test, _ = prepare_sequences(df_test, lookback)

    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    model = LSTMModel().to(device)
    model_path = f"models/model_{train1}_{train2}_to_{test}.pt"
    train_losses, val_losses = train_model(model, train_loader, test_loader, epochs=5, save_path=model_path)

    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            pred = model(xb.to(device)).cpu().numpy()
            preds.extend(pred)
            targets.extend(yb.cpu().numpy())

    preds = scaler_y.inverse_transform(np.array(preds))
    targets = scaler_y.inverse_transform(np.array(targets))

    mae = mean_absolute_error(targets, preds)
    r2 = r2_score(targets, preds)

    results[f"{train1}+{train2}->{test}"] = {"mae": mae, "r2": r2, "losses": (train_losses, val_losses)}

    # Save loss plot
    plt.figure()
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.title(f"Loss Curve: {train1}+{train2} → {test}")
    plt.legend()
    plt.grid()
    plt.savefig(f"plots/loss_{train1}_{train2}_to_{test}.png")
    plt.close()

    # Save prediction plot
    plt.figure()
    plt.plot(targets[:200], label='True')
    plt.plot(preds[:200], label='Predicted')
    plt.title(f"Prediction: {train1}+{train2} → {test}")
    plt.legend()
    plt.grid()
    plt.savefig(f"plots/pred_{train1}_{train2}_to_{test}.png")
    plt.close()



Training on LFP + NMC + 20% of NCA → Testing on remaining 80% of NCA


  return torch.tensor(X_seq, dtype=torch.float32), torch.tensor(y_seq, dtype=torch.float32), scaler_y


Epoch 1 Train Loss: 0.0008 Val Loss: 0.0041
Epoch 2 Train Loss: 0.0004 Val Loss: 0.0037
Epoch 3 Train Loss: 0.0003 Val Loss: 0.0037
Epoch 4 Train Loss: 0.0003 Val Loss: 0.0039
Epoch 5 Train Loss: 0.0003 Val Loss: 0.0035

Training on LFP + NCA + 20% of NMC → Testing on remaining 80% of NMC
Epoch 1 Train Loss: 0.0012 Val Loss: 0.0040
Epoch 2 Train Loss: 0.0007 Val Loss: 0.0035
Epoch 3 Train Loss: 0.0005 Val Loss: 0.0056
Epoch 4 Train Loss: 0.0005 Val Loss: 0.0072
Epoch 5 Train Loss: 0.0004 Val Loss: 0.0101

Training on NMC + LFP + 20% of NCA → Testing on remaining 80% of NCA
Epoch 1 Train Loss: 0.0008 Val Loss: 0.0040
Epoch 2 Train Loss: 0.0004 Val Loss: 0.0035
Epoch 3 Train Loss: 0.0003 Val Loss: 0.0037
Epoch 4 Train Loss: 0.0003 Val Loss: 0.0044
Epoch 5 Train Loss: 0.0003 Val Loss: 0.0045

Training on NMC + NCA + 20% of LFP → Testing on remaining 80% of LFP
Epoch 1 Train Loss: 0.0008 Val Loss: 0.0035
Epoch 2 Train Loss: 0.0005 Val Loss: 0.0035
Epoch 3 Train Loss: 0.0004 Val Loss: 0.003

In [None]:
print("\nPerformance Summary:")
summary_df = pd.DataFrame([
    {"Split": k, "MAE": v["mae"], "R2": v["r2"]}
    for k, v in results.items()
])
print(summary_df)
summary_df.to_csv("results_summary.csv", index=False)
