### Install dependencies

In [None]:
!pip install torch pandas numpy scikit-learn matplotlib

### Import libraries

In [None]:
from src.dataloader import load_random_gauges
from src.lstm import (
    SequencePreprocessor,
    SequenceDataset,
    LSTMPredictor,
    GRUPredictor,
    train_model,
    evaluate_model
)
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

### Config

In [None]:
DATA_DIR = Path("..") / "data"

### Load data

In [None]:
print("Loading gauge data...")
gauges = load_random_gauges(DATA_DIR, n_samples=50, seed=42)
print(f"Loaded {len(gauges)} gauges")

### Preprocess Data

In [None]:
print("\nPreprocessing data...")
preprocessor = SequencePreprocessor(
    target_col='prec',
    sequence_length=14,  # Use past 14 days
    test_size=0.2,
    val_size=0.1
)

train_data, val_data, test_data = preprocessor.preprocess(gauges)

### Create DataLoaders

In [None]:
print("\nCreating DataLoaders...")
train_dataset = SequenceDataset(*train_data)
val_dataset = SequenceDataset(*val_data)
test_dataset = SequenceDataset(*test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

### Cell 6: Create Model

In [None]:
input_dim = train_data[0].shape[2]
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"\nDevice: {device}")
print(f"Input dimension: {input_dim}")

# Choose LSTM or GRU
model = LSTMPredictor(input_dim=input_dim, hidden_dim=64, num_layers=2, dropout=0.2)
# model = GRUPredictor(input_dim=input_dim, hidden_dim=64, num_layers=2, dropout=0.2)

print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

### Train

In [None]:
print("\nTraining model...")
model, train_losses, val_losses = train_model(
    model, 
    train_loader, 
    val_loader, 
    epochs=50,
    lr=0.001,
    device=device
)

### Evaluate

In [None]:
print("\nEvaluating on test set...")
predictions, actuals = evaluate_model(model, test_loader, preprocessor, device=device)

In [None]:
# Plot Results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss curves
axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(val_losses, label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Predictions vs Actuals
axes[1].scatter(actuals, predictions, alpha=0.3, s=1)
axes[1].plot([actuals.min(), actuals.max()], 
             [actuals.min(), actuals.max()], 'r--', lw=2)
axes[1].set_xlabel('Actual Precipitation (mm)')
axes[1].set_ylabel('Predicted Precipitation (mm)')
axes[1].set_title('Predictions vs Actuals')
axes[1].grid(True)

plt.tight_layout()
plt.show()