## Clone Repo

In [None]:
!git clone -b lstm https://github.com/Maustrian3/IntroCompSust_Ass2.git

### Install dependencies

In [None]:
!pip install torch pandas numpy scikit-learn matplotlib

### Import libraries

In [None]:
import sys
from pathlib import Path

sys.path.append('/content/IntroCompSust_Ass2')

from src.lstm import (
    load_preprocessed_artifacts,
    SequenceDataset,
    LSTMPredictor,
    GRUPredictor,
    train_model,
    evaluate_model,
)
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")


KeyboardInterrupt



KeyboardInterrupt: 

### Config

In [None]:
DATA_DIR = Path("..") / "data"

### Load artifacts from GoogleDrive

In [None]:
from google.colab import drive
from pathlib import Path

# Mount Drive
drive.mount("/content/drive")

# Path to folder where artifacts lstm_dataset.npz and scalers.pkl are
BASE_DIR = Path("/content/drive/MyDrive/Colab/IntroCompSust_Ass2/")

(
    (X_train, y_train),
    (X_val, y_val),
    (X_test, y_test),
    feature_scaler,
    target_scaler,
    seq_len,
    feature_cols,
    target_col,
) = load_preprocessed_artifacts(base_dir=BASE_DIR)

# For compatibility with later cells that expect train_data / val_data / test_data
train_data = (X_train, y_train)
val_data = (X_val, y_val)
test_data = (X_test, y_test)

print("Shapes:")
print("  X_train:", X_train.shape, "y_train:", y_train.shape)
print("  X_val:  ", X_val.shape, "y_val:  ", y_val.shape)
print("  X_test: ", X_test.shape, "y_test:", y_test.shape)

### Create DataLoaders

In [None]:
print("\nCreating DataLoaders...")
train_dataset = SequenceDataset(*train_data)
val_dataset = SequenceDataset(*val_data)
test_dataset = SequenceDataset(*test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

### Create Model

In [None]:
input_dim = train_data[0].shape[2]
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"\nDevice: {device}")
print(f"Input dimension: {input_dim}")

# Choose LSTM or GRU
model = LSTMPredictor(input_dim=input_dim, hidden_dim=64, num_layers=2, dropout=0.2)
# model = GRUPredictor(input_dim=input_dim, hidden_dim=64, num_layers=2, dropout=0.2)

print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

### Train

In [None]:
print("\nTraining model...")
model, train_losses, val_losses = train_model(
    model, 
    train_loader, 
    val_loader, 
    epochs=50,
    lr=0.001,
    device=device
)

### Evaluate

In [None]:
print("\nEvaluating on test set...")
predictions, actuals = evaluate_model(
    model,
    test_loader,
    target_scaler=target_scaler,
    device=device,
)

# Plot Results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss curves
axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(val_losses, label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Predictions vs Actuals
axes[1].scatter(actuals, predictions, alpha=0.3, s=1)
axes[1].plot([actuals.min(), actuals.max()], 
             [actuals.min(), actuals.max()], 'r--', lw=2)
axes[1].set_xlabel('Actual Precipitation (mm)')
axes[1].set_ylabel('Predicted Precipitation (mm)')
axes[1].set_title('Predictions vs Actuals')
axes[1].grid(True)

plt.tight_layout()
plt.show()