In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from typing import List
from datasetsforecast.long_horizon import LongHorizon
import pandas as pd
import numpy as np
import torch
from typing import Optional

import os, pathlib
from glob import glob

from torch.utils.data import Dataset, DataLoader, Subset
from tqdm.auto import tqdm

from dataset import LongHorizonUnivariateDataModule, LongHorizonUnivariateDataset
from dataset import ElectricityUnivariateDataModule, ElectricityUnivariateDataset

import pytorch_lightning as pl
from omegaconf import OmegaConf
from utils.model_factory import instantiate

from statsforecast.models import AutoETS, ETS, Theta, AutoCES

from metrics import SMAPE, MAPE, CRPS

import os
os.environ['CUDA_VISIBLE_DEVICES']='7'

RESULTS_DIR = './results'
from metrics import SMAPE, MAPE, CRPS

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

  __import__("pkg_resources").declare_namespace(__name__)
  "ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq="M"),


In [None]:
# for ckpt in tqdm(model_list):
#     print(ckpt)
#     model = torch.load(ckpt, map_location='cpu')
#     model['hyper_parameters']['cfg']['model']['nn']['backbone']['_target_'] = 'modules.AqOutTransformer'
#     torch.save(model, ckpt)

In [5]:
import os
from glob import glob

# Check if data directory exists
print("1. Checking data directory structure:")
if os.path.exists('./data/'):
    print("   ./data/ exists")
    for item in os.listdir('./data/'):
        print(f"      {item}")
        
    # Check electricity subdirectory
    if os.path.exists('./data/electricity/'):
        print("\n   ./data/electricity/ contents:")
        for item in os.listdir('./data/electricity/'):
            print(f"      {item}")
            
        # Check datasets subdirectory
        if os.path.exists('./data/electricity/datasets/'):
            print("\n   ./data/electricity/datasets/ contents:")
            for item in os.listdir('./data/electricity/datasets/'):
                print(f"         {item}")
else:
    print("   ./data/ does not exist!")

print("\n2. Searching for any CSV files in data directory:")
csv_files = glob("./data/**/*.csv", recursive=True)
print(f"   Found {len(csv_files)} CSV files:")
for f in csv_files[:20]:
    print(f"   {f}")

print("\n3. Searching for df_y.csv files:")
df_y_files = glob("**/df_y.csv", recursive=True)
print(f"   Found {len(df_y_files)} df_y.csv files:")
for f in df_y_files:
    print(f"   {f}")

1. Checking data directory structure:
   ./data/ exists
      downloads
      electricity
      emhires

   ./data/electricity/ contents:
      datasets

   ./data/electricity/datasets/ contents:
         .ipynb_checkpoints

2. Searching for any CSV files in data directory:
   Found 0 CSV files:

3. Searching for df_y.csv files:
   Found 0 df_y.csv files:


In [None]:
checkpoint_name = "model-epoch=14.ckpt"

# Change these to match your actual checkpoints
backbone = "modules.NBEATSAQCAT"  # Changed from NBEATSAQFILM
maxnorm = True  # Changed from False

# Rest of your parameters
blocks = 30
lr = 0.0005
width = 1024
layers = 3
warmup = 400
train_q = 1
quantile_embed_num = 100
quantile_embed_dim = 64
loss = "MQNLoss"
seed = "*" 

# Fixed checkpoint pattern - removed 'test/' from path
if 'NBEATS' in backbone:
    checkpoint_pattern = f"lightning_logs/MHLV/model=model.AnyQuantileForecaster-backbone={backbone}-history=168-lr={lr}-width={width}-layers={layers}-blocks={blocks}-warmup={warmup}-maxnorm={maxnorm}-loss=losses.{loss}-seed={seed}/checkpoints/{checkpoint_name}"
elif 'Cnn' in backbone:
    if loss == "MQNLoss":
        checkpoint_pattern = f"lightning_logs/MHLV/model=model.GeneralAnyQuantileForecaster-backbone={backbone}-history=168-lr={lr}-width={width}-train_q={train_q}-quantile_embed_num={quantile_embed_num}-quantile_embed_dim={quantile_embed_dim}-maxnorm={maxnorm}-loss=losses.{loss}-seed={seed}/checkpoints/{checkpoint_name}"
    else:
        checkpoint_pattern = f"lightning_logs/MHLV/model=model.GeneralAnyQuantileForecaster-backbone={backbone}-history=168-lr={lr}-width={width}-train_q={train_q}-quantile_embed_num={quantile_embed_num}-quantile_embed_dim={quantile_embed_dim}-maxnorm={maxnorm}*-seed={seed}/checkpoints/{checkpoint_name}"
elif 'Transformer' in backbone:
    checkpoint_pattern = f"lightning_logs/MHLV/model=model.GeneralAnyQuantileForecaster-backbone={backbone}-history=168-lr={lr}-width={width}-blocks={blocks}-train_q={train_q}-quantile_embed_num={quantile_embed_num}-quantile_embed_dim={quantile_embed_dim}-maxnorm={maxnorm}-loss=losses.{loss}-seed={seed}/checkpoints/{checkpoint_name}"

model_list = glob(checkpoint_pattern)

print(f"Searching for: {checkpoint_pattern}")
print(f"Found {len(model_list)} matches:")
for m in model_list:
    print(f"  {m}")

if len(model_list) == 0:
    raise FileNotFoundError(f"No checkpoints found matching pattern: {checkpoint_pattern}")

cfg = torch.load(model_list[0], map_location='cpu')['hyper_parameters']
cfg = OmegaConf.create(cfg).cfg

cfg.dataset.split_boundaries = ['2006-01-01', '2017-01-01', '2018-01-01', '2019-01-01']
dm = instantiate(cfg.dataset)
dm.setup(stage='test')
test_loader = dm.test_dataloader()

model_list

In [None]:
dfs = []
for b in tqdm(test_loader):
    df = pd.DataFrame.from_dict({k: list(v.cpu().numpy()) for k,v in b.items() if
                                k in ['target', 'history', 'series_id', 'quantiles']})
    dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)

In [None]:
num_random = 100
num_deterministic = 101
num_random = 100
num_deterministic = 101

# Validate total quantiles
TOTAL_QUANTILES = num_random + num_deterministic
print(f"Expected total quantiles: {TOTAL_QUANTILES}")
print(f"Actual quantiles per sample: {len(df.quantiles.iloc[0])}")

# Verify the split makes sense
if len(df.quantiles.iloc[0]) != TOTAL_QUANTILES:
    raise ValueError(
        f"Quantile count mismatch! Expected {TOTAL_QUANTILES}, "
        f"got {len(df.quantiles.iloc[0])}"
    )

In [None]:
def predict_checkpoint(checkpoint_file, trainer):
    cfg = torch.load(checkpoint_file)['hyper_parameters']
    cfg = OmegaConf.create(cfg).cfg
    
    model = instantiate(cfg.model, cfg).load_from_checkpoint(checkpoint_file)

    predictions = trainer.predict(model, dataloaders=dm.test_dataloader())
    predictions = torch.cat(predictions).detach().cpu()
    
    return predictions

trainer = pl.Trainer(accelerator='gpu', devices=1)
predictions = []
for ckpt in model_list:
    pred = predict_checkpoint(ckpt, trainer)
    predictions.append(pred)
    
#     predictions_deterministic = pred[...,mid_idx-num_deterministic//2:mid_idx+num_deterministic//2+1]
#     predictions_random = torch.cat([pred[..., 0:mid_idx-num_deterministic//2], 
#                                     pred[..., mid_idx+num_deterministic//2+1:]], dim=-1)
#     predictions_random, _ = torch.sort(predictions_random, dim=-1)
#     predictions_sort = torch.cat([predictions_deterministic, predictions_random], dim=-1)
    
#     predictions.append(predictions_sort)
    
predictions_ensemble = torch.stack(predictions).mean(dim=0)

In [None]:
crps_rnd = CRPS()
crps_fixed = CRPS()

for target, pred, q in tqdm(zip(df.target, predictions_ensemble, df.quantiles), total=len(df)):
    
    if np.isinf(target).any() or np.isnan(target).any():
        continue

    # Fix: Extract the actual quantile and prediction arrays from the row
    q = np.array(q)  # Convert to numpy array if it isn't already
    pred = np.array(pred)  # Convert to numpy array if it isn't already
    target = np.array(target)  # Convert to numpy array if it isn't already
    
    # Split quantiles and predictions (deterministic first, then random)
    q_deterministic = q[:num_deterministic]
    q_random = q[num_deterministic:]
    
    predictions_deterministic = pred[:num_deterministic]
    predictions_random = pred[num_deterministic:]
    
    # Update metrics with proper array indexing
    crps_rnd.update(
        preds=torch.Tensor(predictions_random)[None], 
        target=torch.Tensor(target)[None], 
        q=torch.Tensor(q_random)[None]
    )
    
    crps_fixed.update(
        preds=torch.Tensor(predictions_deterministic)[None], 
        target=torch.Tensor(target)[None], 
        q=torch.Tensor(q_deterministic)[None]
    )
    
print("CRPS random quants", crps_rnd.compute().cpu().numpy())
print("CRPS mandatory quants", crps_fixed.compute().cpu().numpy())

In [None]:
RESULTS_PATH = f'results/MHLV/{backbone.split(".")[-1]}-maxnorm={maxnorm}-loss={loss}'
os.makedirs(RESULTS_PATH, exist_ok=True)

for worker in tqdm(range(len(predictions))):
    p = predictions[worker]
    for series_id in df.series_id.unique():
        series_mask = df.series_id == series_id

        df_series = df[series_mask]
        p_series = p[series_mask.values].numpy()
        
        target_series = np.array([v for v in df.target[series_mask.values]])
        target_series = np.nan_to_num(target_series, posinf=np.nan)
        target_series = np.repeat(target_series[...,None], p.shape[-1], axis=-1)
        
        quantile_series = np.array([v for v in df.quantiles[series_mask.values]])
        quantile_series = np.repeat(quantile_series[:,None], p.shape[1], axis=1)
        
        forec = pd.DataFrame({f"forec{worker+1}": p_series.ravel()})
        if worker == 0:
            forec['actuals'] = target_series.ravel()
            forec['quants'] = quantile_series.ravel()
            forec = forec[['actuals', 'quants', 'forec1']]
        
        forec.to_pickle(os.path.join(RESULTS_PATH, f'e1w{worker+1}_{series_id}.pickle'))
                