# Train LSTM Model (Point Prediction)

This notebook trains an LSTM model for RUL prediction using sequences of past cycles.

**Phase 1**: Point prediction only - no uncertainty quantification.
**Phase 2**: Monte Carlo Dropout will be added in `06_add_uncertainty_lstm_mc.ipynb`


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# TensorFlow for LSTM
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Add project root to path
project_root = Path("/Users/siddhantaggarwal/Desktop/Battery_RUL").resolve()
sys.path.append(str(project_root))

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# Load dataset with EMD features
processed_dir = project_root / "data" / "processed"
df = pd.read_parquet(processed_dir / "rul_features_with_emd.parquet")

print(f"Dataset shape: {df.shape}")

# Filter out rows with NaN RUL
df_clean = df[df['RUL'].notna()].copy()
print(f"After removing NaN RUL: {len(df_clean)} rows")
print(f"RUL range: [{df_clean['RUL'].min():.1f}, {df_clean['RUL'].max():.1f}] cycles")


In [None]:
# Prepare features
exclude_cols = [
    'battery_id', 'filename', 'type', 'start_time', 'test_id', 'uid',
    'split', 'cycle_index', 'EOL_cycle', 'RUL', 'SOH', 'Capacity', 
    'Re', 'Rct', 'ambient_temperature'
]

feature_cols = [c for c in df_clean.columns if c not in exclude_cols]
print(f"Total features: {len(feature_cols)}")

# Split by battery
train_idx = df_clean['split'] == 'train'
test_idx = df_clean['split'] == 'test'

df_train = df_clean[train_idx].copy()
df_test = df_clean[test_idx].copy()

print(f"Train: {len(df_train)}, Test: {len(df_test)}")


In [None]:
# Create sequences for LSTM
# Use past N cycles to predict RUL at current cycle
sequence_length = 20  # Number of past cycles to use

def create_sequences(df_subset, feature_cols, target_col, seq_len=sequence_length):
    """Create sequences for LSTM training."""
    sequences = []
    targets = []
    
    for battery_id in df_subset['battery_id'].unique():
        battery_data = df_subset[df_subset['battery_id'] == battery_id].sort_values('cycle_index')
        
        # Extract features and target
        features = battery_data[feature_cols].fillna(0).values
        target = battery_data[target_col].values
        
        # Create sequences (need at least seq_len cycles)
        if len(features) >= seq_len:
            for i in range(seq_len, len(features)):
                sequences.append(features[i-seq_len:i])
                targets.append(target[i])
    
    return np.array(sequences), np.array(targets)

# Create sequences
print(f"Creating sequences with length {sequence_length}...")
X_train_seq, y_train_seq = create_sequences(df_train, feature_cols, 'RUL', sequence_length)
X_test_seq, y_test_seq = create_sequences(df_test, feature_cols, 'RUL', sequence_length)

print(f"Train sequences: {X_train_seq.shape}")
print(f"Test sequences: {X_test_seq.shape}")
print(f"Feature shape per timestep: {X_train_seq.shape[2]}")


In [None]:
# Normalize features
# Reshape for scaling (treat each timestep independently)
n_samples, n_timesteps, n_features = X_train_seq.shape
X_train_reshaped = X_train_seq.reshape(-1, n_features)
X_test_reshaped = X_test_seq.reshape(-1, n_features)

# Fit scaler on training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

# Reshape back
X_train_seq = X_train_scaled.reshape(n_samples, n_timesteps, n_features)
X_test_seq = X_test_scaled.reshape(X_test_seq.shape[0], n_timesteps, n_features)

print("âœ… Features normalized")


In [None]:
# Build LSTM model (point prediction - no dropout for now)
model = keras.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(sequence_length, len(feature_cols))),
    layers.LSTM(32, return_sequences=False),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)  # Single output for RUL prediction
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

model.summary()


In [None]:
# Train model
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# Split train into train/val for early stopping
X_train_final, X_val_final, y_train_final, y_val_final = train_test_split(
    X_train_seq, y_train_seq, test_size=0.2, random_state=42
)

history = model.fit(
    X_train_final, y_train_final,
    validation_data=(X_val_final, y_val_final),
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping],
    verbose=1
)

print("âœ… Model trained!")


In [None]:
# Evaluate model
def calculate_metrics(y_true, y_pred, name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / (np.abs(y_true) + 1e-6))) * 100
    
    print(f"\n{name} Metrics:")
    print(f"  MAE:  {mae:.2f} cycles")
    print(f"  RMSE: {rmse:.2f} cycles")
    print(f"  RÂ²:   {r2:.3f}")
    print(f"  MAPE: {mape:.2f}%")
    return {'mae': mae, 'rmse': rmse, 'r2': r2, 'mape': mape}

# Predictions
y_train_pred = model.predict(X_train_seq, verbose=0).flatten()
y_test_pred = model.predict(X_test_seq, verbose=0).flatten()

# Metrics
train_metrics = calculate_metrics(y_train_seq, y_train_pred, "Train")
test_metrics = calculate_metrics(y_test_seq, y_test_pred, "Test")


In [None]:
# Visualize training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('Training History - Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['mae'], label='Train MAE')
axes[1].plot(history.history['val_mae'], label='Val MAE')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('Training History - MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Save model and results
models_dir = project_root / "results" / "models"
models_dir.mkdir(parents=True, exist_ok=True)

# Save model
model_path = models_dir / "lstm_rul_point_model.h5"
model.save(model_path)
print(f"âœ… Saved model: {model_path}")

# Save scaler
scaler_path = models_dir / "lstm_scaler.pkl"
joblib.dump(scaler, scaler_path)
print(f"âœ… Saved scaler: {scaler_path}")

# Save predictions
results = pd.DataFrame({
    'actual_rul': y_test_seq,
    'predicted_rul': y_test_pred
})

results_path = models_dir / "lstm_predictions_point.csv"
results.to_csv(results_path, index=False)
print(f"âœ… Saved predictions: {results_path}")

# Save metrics
metrics = pd.DataFrame({
    'metric': ['MAE', 'RMSE', 'RÂ²', 'MAPE'],
    'train': [train_metrics['mae'], train_metrics['rmse'], train_metrics['r2'], train_metrics['mape']],
    'test': [test_metrics['mae'], test_metrics['rmse'], test_metrics['r2'], test_metrics['mape']]
})

metrics_path = models_dir / "lstm_metrics_point.csv"
metrics.to_csv(metrics_path, index=False)
print(f"âœ… Saved metrics: {metrics_path}")

print(f"\nðŸ“Š Summary:")
print(f"   Test MAE: {test_metrics['mae']:.2f} cycles")
print(f"   Test RMSE: {test_metrics['rmse']:.2f} cycles")
print(f"   Test RÂ²: {test_metrics['r2']:.3f}")
print(f"\nâœ… Phase 1 Complete: LSTM point prediction model saved!")
print(f"   Next: Train Transformer model, then compare all 3.")
