# Deep Learning Prediction Workflow
Predict daily household electricity consumption from weather + recent usage.
Trains three sequence models (LSTM, GRU, temporal CNN), visualizes performance, and saves artifacts for CLI inference.


## Setup
Install/verify dependencies. Kaggle API download matches the `notebooks/downloader.ipynb` approach (KaggleApi with targeted files).


In [8]:
%pip install -q torch pandas scikit-learn matplotlib seaborn joblib kaggle holidays
# Uncomment above if your environment is missing dependencies


In [9]:
import os
import subprocess

# Clone your GitHub repo (replace with your actual repo URL)
GITHUB_URL = "https://github.com/HolikGellert/Projektmunka.git"
REPO_PATH = "/content/Projektmunka"

print("Cloning repository from GitHub...")
if not os.path.exists(REPO_PATH):
    subprocess.run(["git", "clone", GITHUB_URL, REPO_PATH], check=True)
    print(f"✓ Cloned to {REPO_PATH}")
else:
    print(f"✓ Repository already exists at {REPO_PATH}")

# Verify structure
from pathlib import Path
if (Path(REPO_PATH) / "prediction").exists():
    print("✓ Found prediction/ directory")
else:
    print("✗ prediction/ directory not found")

Cloning repository from GitHub...
✓ Cloned to /content/Projektmunka
✗ prediction/ directory not found


## Imports & Paths
Add the repo root to the Python path so modules inside `prediction/src` are importable.

**For Colab (GPU):** Upload your `Projektmunka` folder to Google Drive, then run this notebook. It will auto-mount and find your project.

**For Local:** Run `jupyter notebook prediction/notebooks/prediction_workflow.ipynb` from the repo root.

In [10]:
import sys
from pathlib import Path

# For Colab with GitHub clone
REPO_ROOT = Path('/content/Projektmunka')

if not (REPO_ROOT / 'prediction').is_dir():
    raise RuntimeError(
        f'Repository not found at {REPO_ROOT}.\n'
        'Make sure:\n'
        '1. You ran the Git clone cell above\n'
        '2. Your GitHub repo URL is correct\n'
        '3. The repo contains prediction/ subdirectory'
    )

print(f'✓ Repo root: {REPO_ROOT}')
print(f'✓ Prediction dir exists: {(REPO_ROOT / "prediction").is_dir()}')

# Add to sys.path
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style='whitegrid')
print(f'✓ CUDA available: {torch.cuda.is_available()}')

RuntimeError: Repository not found at /content/Projektmunka.
Make sure:
1. You ran the Git clone cell above
2. Your GitHub repo URL is correct
3. The repo contains prediction/ subdirectory

## Data prep
Merge energy + weather, add temporal features, scale, and build sliding-window sequences for the models.


In [None]:
from prediction.config import PredictionConfig as Config
from prediction.src.data_prep import PredictionDataPrep

prep = PredictionDataPrep()
merged_df = prep.add_features(prep.load_raw())
seq_data = prep.build_sequences(merged_df)
X_train, X_val, X_test, y_train, y_val, y_test = prep.train_val_test_split(seq_data)

print('Merged shape:', merged_df.shape)
print('Sequence tensor:', seq_data.X.shape)
print('Split sizes:', len(X_train), len(X_val), len(X_test))


ModuleNotFoundError: No module named 'prediction'

## Build dataloaders
Splits sequences into train/validation sets.


In [None]:
from prediction.src.train_utils import create_dataloaders, save_scaler, make_loader

train_loader, val_loader = create_dataloaders(X_train, y_train, X_val, y_val, batch_size=Config.BATCH_SIZE)
test_loader = make_loader(X_test, y_test, batch_size=Config.BATCH_SIZE, shuffle=False)
save_scaler(seq_data.scaler)
len(train_loader), len(val_loader), len(test_loader)


## Train models
Train an LSTM, GRU, and temporal CNN; log histories and persist weights.


In [None]:
from prediction.src.models import LSTMRegressor, GRURegressor, TemporalCNNRegressor
from prediction.src.train_utils import train_model, save_model, save_metadata, evaluate_mae

input_size = len(seq_data.feature_cols)
models = {
    'lstm': (LSTMRegressor(input_size=input_size), Config.LSTM_MODEL_PATH),
    'gru': (GRURegressor(input_size=input_size), Config.GRU_MODEL_PATH),
    'tcn': (TemporalCNNRegressor(input_size=input_size), Config.CNN_MODEL_PATH),
}

histories = {}
metrics = {}
for name, (model, path) in models.items():
    print(f'\nTraining {name.upper()}...')
    history = train_model(
        model,
        train_loader,
        val_loader,
        epochs=Config.EPOCHS,
        lr=Config.LEARNING_RATE,
        weight_decay=Config.WEIGHT_DECAY,
        grad_clip=Config.GRAD_CLIP,
        max_batches=Config.MAX_TRAIN_BATCHES,
        early_stop_patience=Config.EARLY_STOP_PATIENCE,
    )
    val_mae = evaluate_mae(model, val_loader, max_batches=Config.MAX_EVAL_BATCHES)
    test_mae = evaluate_mae(model, test_loader, max_batches=Config.MAX_EVAL_BATCHES)
    save_model(model, path)
    histories[name] = history
    metrics[name] = {'val_mae': val_mae, 'test_mae': test_mae}

metadata = {
    'feature_cols': seq_data.feature_cols,
    'lookback': seq_data.lookback,
    'metrics': metrics,
}
save_metadata(metadata)
metrics


## Plot training curves


In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
for name, hist in histories.items():
    ax.plot(hist['train_loss'], label=f'{name} train')
    ax.plot(hist['val_loss'], linestyle='--', label=f'{name} val')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE Loss')
ax.legend()
ax.set_title('Training vs Validation Loss')
plt.show()


## Inspect predictions
Visual sanity check on a validation batch.


In [None]:
# Choose best model based on validation MAE and visualize a batch
model_key = min(metrics, key=lambda k: metrics[k]['val_mae'])
best_model = models[model_key][0]
best_model.eval()
batch_X, batch_y = next(iter(val_loader))
with torch.no_grad():
    preds = best_model(batch_X).squeeze().numpy()
truth = batch_y.squeeze().numpy()

plt.figure(figsize=(8, 4))
plt.plot(preds, label='pred')
plt.plot(truth, label='true')
plt.title(f'Validation sample predictions ({model_key})')
plt.legend()
plt.show()


## CLI inference
Artifacts saved to `prediction/models`. Run `python prediction/predict.py --model lstm` and follow prompts to get a next-day prediction.
