In [None]:
import os
import pandas as pd
import torch

from lightning.pytorch import Trainer
from pytorch_forecasting import TimeSeriesDataSet, NBeats
from pytorch_forecasting.metrics import RMSE

# ---------------- config ----------------
DATA_PATH = r"./../Data Given for Challenge/train.csv"
OUT_PATH  = "nbeats_weather_forecast.csv"

MAX_ENCODER_LENGTH = 72   # lookback length (3 days hourly)
MAX_PRED_LENGTH    = 24   # forecast horizon (1 day)
BATCH_SIZE         = 64
EPOCHS             = 5
LR                 = 1e-3
DEVICE             = "cuda" if torch.cuda.is_available() else "cpu"

# ---------------- load & preprocess ----------------
df = pd.read_csv(DATA_PATH)

# Convert timestamp
df["timestamp"] = pd.to_datetime(df["Timestamp"])
df = df.drop(columns=["Timestamp"])

# Ensure Location is categorical (for group_ids)
df["Location"] = df["Location"].astype(str)

# Add time index (per location)
df["time_idx"] = df.groupby("Location").cumcount()

# ---------------- build dataset ----------------
# all numeric features except special columns
feature_cols = [c for c in df.columns if c not in ["timestamp", "time_idx", "Location", "out"]]

# ---------------- build dataset ----------------
ts_dataset = TimeSeriesDataSet(
    df,
    time_idx="time_idx",
    target="out",
    group_ids=["Location"],
    max_encoder_length=MAX_ENCODER_LENGTH,
    max_prediction_length=MAX_PRED_LENGTH,
    time_varying_known_reals=[],
    time_varying_unknown_reals=["out"],   # ONLY target allowed for N-BEATS
    target_normalizer=None,
)

# train/val split
train_ds = TimeSeriesDataSet.from_dataset(ts_dataset, df, predict=False, stop_randomization=True)
val_ds   = TimeSeriesDataSet.from_dataset(ts_dataset, df, predict=True, stop_randomization=True)

train_loader = train_ds.to_dataloader(train=True, batch_size=BATCH_SIZE, num_workers=0)
val_loader   = val_ds.to_dataloader(train=False, batch_size=BATCH_SIZE, num_workers=0)

# ---------------- model ----------------
model = NBeats.from_dataset(
    ts_dataset,
    learning_rate=LR,
    log_interval=10,
    log_val_interval=1,
    loss=RMSE(),
)

# ---------------- train ----------------
trainer = Trainer(
    max_epochs=EPOCHS,
    accelerator="gpu" if DEVICE == "cuda" else "cpu",
    devices=1,
    gradient_clip_val=0.1,
)

trainer.fit(model, train_loader, val_loader)

# ---------------- predict ----------------
preds, x = model.predict(val_loader, mode="prediction", return_x=True)
preds = preds.cpu().numpy()

# build prediction dataframe
df_preds = pd.DataFrame({
    "time_idx": x["time_idx"],
    "Location": x["Location"],
    "pred": preds.flatten()
})

# map time_idx back to timestamp
time_map = df.set_index("time_idx")["timestamp"].to_dict()
df_preds["timestamp"] = df_preds["time_idx"].map(time_map)

df_preds = df_preds[["timestamp", "Location", "pred"]]
df_preds.to_csv(OUT_PATH, index=False)

print(f"✅ Saved predictions to {OUT_PATH}")
print(df_preds.head())


c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=21` in the `DataLoader` to improve performance.


                                                                           

c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=21` in the `DataLoader` to improve performance.


Epoch 4: 100%|██████████| 2679/2679 [01:31<00:00, 29.23it/s, v_num=0, train_loss_step=109.0, val_loss=175.0, train_loss_epoch=262.0]  

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 2679/2679 [01:31<00:00, 29.15it/s, v_num=0, train_loss_step=109.0, val_loss=175.0, train_loss_epoch=262.0]

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]





c:\Users\kumar\AppData\Local\Programs\Python\Python313\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=21` in the `DataLoader` to improve performance.


ValueError: too many values to unpack (expected 2)