In [10]:
import pandas as pd 
import numpy as np
# import matplotlib.pyplot as plt
from tqdm import tqdm

# import cufflinks as cf
# from plotly import __version__
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# cf.go_offline()

%matplotlib inline
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

from neuralforecast.auto import AutoTCN
from neuralforecast.tsdataset import TimeSeriesDataset
from neuralforecast.core import NeuralForecast
from neuralforecast.models import TCN

from datetime import datetime, timedelta
# import plotly.graph_objects as go
# import matplotlib.colors as mcolors
from neuralforecast.losses.pytorch import MQLoss
from tqdm import tqdm

In [11]:
# Load and process the MEASLES_ARIZONA data
df = pd.read_csv("../outbreaks_disease_location.csv")
value_columns = [str(i) for i in range(60)]
series_values = df[value_columns].fillna(0).astype(float)
start_dates = pd.to_datetime(df["start_date"])

# Shuffle and split
shuffled_indices = df.sample(frac=1, random_state=42).index
split_point = int(0.8 * len(df))
train_indices = shuffled_indices[:split_point]
test_indices = shuffled_indices[split_point:]

In [12]:
train_records = []
for i, row in series_values.iloc[train_indices].iterrows():
    dates = pd.date_range(start="2000-01-01", periods=60, freq="W-SAT")
    for t, value in enumerate(row):
        train_records.append({"unique_id": f"Y{i+1}", "ds": dates[t], "y": value})
df_train = pd.DataFrame(train_records)

test_start_dates = start_dates.loc[test_indices] - pd.Timedelta(weeks=4)
df_test_all = []

for idx in test_indices:
    start_date = test_start_dates.loc[idx]
    row = series_values.loc[idx]
    dates = pd.date_range(start=start_date, periods=60, freq="W-SAT")
    for t, value in enumerate(row):
        df_test_all.append({"unique_id": f"Y_{idx}", "ds": dates[t], "y": value})

df_test_all = pd.DataFrame(df_test_all)

In [13]:
df_train["date"] = pd.to_datetime(df_train["ds"])
df_train.set_index("date", inplace = True)

df_test_all["date"] = pd.to_datetime(df_test_all["ds"])
df_test_all.set_index("date", inplace = True)

In [14]:
df_test_all

Unnamed: 0_level_0,unique_id,ds,y
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-08-13,Y_5607,2022-08-13,20775.0
2022-08-20,Y_5607,2022-08-20,16570.0
2022-08-27,Y_5607,2022-08-27,13995.0
2022-09-03,Y_5607,2022-09-03,12145.0
2022-09-10,Y_5607,2022-09-10,10686.0
...,...,...,...
1939-05-20,Y_7270,1939-05-20,0.0
1939-05-27,Y_7270,1939-05-27,0.0
1939-06-03,Y_7270,1939-06-03,0.0
1939-06-10,Y_7270,1939-06-10,0.0


In [15]:
class FixedModelTCNProcessor:
    def __init__(self, dates=[]):
        self.dates = dates
        self.forecasts = []
        self.eval_pairs = []
        self.input_size = None
        self.nf = None
        self.config = None
        self.testset = None
        self.maes = []
        self.mses = []
        self.mapes = []
        self.nmses = []
        self.reference_dates = {}
        self.metrics_df = pd.DataFrame(columns=["Unique_id","Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns=["Unique_id","Reference Date", "Target End Date", "GT" , "Quantile", "Prediction"])
        self.model = None
        self.selected_input_size = None

    def create_fixed_model(self, df_train, df_test, test_ids, h, freq, level=[]):
        input_length = df_train.groupby("unique_id").size().min()
        max_input_size = max(8, input_length - h - 1)
        self.testset = df_test

        def config(trial):
            return {
                "input_size": trial.suggest_int("input_size", 8, max_input_size),
                "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
                "batch_size": trial.suggest_categorical("batch_size", [16, 32, 64]),
                "random_seed": trial.suggest_int("random_seed", 1, 99999),
                "start_padding_enabled": True,
                "accelerator": "gpu",
                "devices": 2,
                "strategy": "auto"
            }

        self.config = config

        if not level:
            nf = NeuralForecast(models=[AutoTCN(h=h, backend="optuna", config=self.config)], freq=freq)
        else:
            nf = NeuralForecast(models=[AutoTCN(h=h, backend="optuna", loss=MQLoss(level=level), config=self.config)], freq=freq)

        nf.fit(df=df_train)
        nf.save(path='./saved_models/TCN/',
                model_index=None,
                overwrite=True,
                save_dataset=True)
        self.nf = nf
        self.model = nf.models[0].model
        self.selected_input_size = self.model.hparams['input_size']

        for uid in tqdm(test_ids, desc="Forecasting with AutoTCN"):
            series_df = df_test[df_test["unique_id"] == uid]
            if len(series_df) <= self.selected_input_size:
                continue
            reference_date = series_df["ds"].iloc[-4]
            self.reference_dates[uid] = reference_date
            df = series_df[series_df["ds"] < reference_date]
            forecast = nf.predict(df=df).set_index("ds")
            forecast_horizon = forecast.index
            ground_truth = series_df[series_df["ds"].isin(forecast_horizon)].set_index("ds")
            self.forecasts.append(forecast)
            self.eval_pairs.append((forecast, ground_truth))
            self.dates.append(reference_date.strftime("%Y-%m-%d"))

    def calculate_metrics(self):
        for forecast_df, truth_df in self.eval_pairs:
            y_true = truth_df.iloc[:, 1]
            y_pred = forecast_df.iloc[:, 0]
            self.maes.append(mean_absolute_error(y_true, y_pred))
            self.mses.append(mean_squared_error(y_true, y_pred))
            self.mapes.append(mean_absolute_percentage_error(y_true, y_pred))
            self.nmses.append(self.mses[-1] / np.var(y_true))

    def create_metrics_df(self):
        for i in range(len(self.dates)):
            self.metrics_df.loc[len(self.metrics_df)] = [
                self.eval_pairs[i][0]["unique_id"].iloc[0],
                self.dates[i],
                self.maes[i],
                self.mses[i],
                self.mapes[i],
                self.nmses[i]
            ]
    
    def efficient_create_display_df(self, model_name="AutoTCN"):
        records = []
        testset_indexed = self.testset.set_index(["unique_id", "ds"])

        for i in tqdm(range(len(self.forecasts)), desc="Generating display_df"):
            forecast_df = self.forecasts[i]
            uid = forecast_df["unique_id"].iloc[0]
            reference_date = self.dates[i]
            gt_series = testset_indexed.loc[uid]["y"] if uid in testset_indexed.index.get_level_values(0) else pd.Series()

            for col in forecast_df.columns:
                if col == "unique_id":
                    continue

                if "lo" in col or "hi" in col:
                    number = int(col.split("-")[-1])
                    alpha = 1 - (number / 100)
                    quantile = 1 - (alpha / 2) if "hi" in col else alpha / 2
                elif col in [model_name, f"{model_name}-median"]:
                    quantile = 0.5
                else:
                    continue

                preds = forecast_df[col]
                for idx, pred in preds.items():
                    records.append({
                        "Unique_id": uid,
                        "Reference Date": reference_date,
                        "Target End Date": idx,
                        "GT": gt_series.get(idx, np.nan),
                        "Quantile": quantile,
                        "Prediction": pred
                    })

        self.display_df = pd.DataFrame(records).sort_values(
            by=["Unique_id", "Reference Date", "Target End Date", "GT", "Quantile"]
        ).reset_index(drop=True)

    
    

    def efficient_compute_wis(self):
        df = self.display_df.sort_values(by=["Unique_id", "Reference Date", "Target End Date", "Quantile"])
        results = []
        grouped = df.groupby(["Unique_id", "Reference Date", "Target End Date"])
        for (uid, ref_date, tgt_date), group in tqdm(grouped, desc="Computing WIS"):
            gt = group["GT"].iloc[0]
            if 0.5 not in group["Quantile"].values:
                continue
            median_pred = group[group["Quantile"] == 0.5]["Prediction"].iloc[0]
            ae = abs(median_pred - gt)
            wis = ae
            results.append({
                "Unique_id": uid,
                "Reference Date": ref_date,
                "Target End Date": tgt_date,
                "GT": gt,
                "WIS": wis
            })
        return pd.DataFrame(results)

In [16]:
test_ids = [f"Y_{i}" for i in test_indices]

In [17]:
processor = FixedModelTCNProcessor()

In [18]:
processor.create_fixed_model(df_train, df_test_all, test_ids, h=4, freq="W-SAT", level = [10,20,30,40,50,60,70,80, 85,90,95])

[32m[I 2026-02-07 14:41:52,056][0m A new study created in memory with name: no-name-06b108c0-b221-4aaa-b332-0b582c1199e6[0m
[rank: 0] Seed set to 89546
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
💡 Tip: For seamless cloud logging and experiment tracking, try installing [litlogger](https://pypi.org/project/litlogger/) to enable LitLogger, which logs metrics and artifacts automatically to the Lightning Experiments platform.
W0207 14:41:52.163000 724528 /sfs/gpfs/tardis/home/aa5dw/.local/lib/python3.12/site-packages/torch/multiprocessing/spawn.py:165] Terminating process 528552 via signal SIGTERM
[33m[W 2026-02-07 14:41:52,176][0m Trial 0 failed with parameters: {'input_size': 53, 'learning_rate': 0.0031426705112991005, 'batch_size': 64, 'random_seed': 89546} because of the following error: ProcessRaisedException('\n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n  File "/home/aa5dw/.local/lib/python3.12/sit

ProcessRaisedException: 

-- Process 0 terminated with the following error:
Traceback (most recent call last):
  File "/home/aa5dw/.local/lib/python3.12/site-packages/torch/multiprocessing/spawn.py", line 87, in _wrap
    fn(i, *args)
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 173, in _wrapping_function
    results = function(*args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/trainer/trainer.py", line 630, in _fit_impl
    self._run(model, ckpt_path=ckpt_path, weights_only=weights_only)
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/trainer/trainer.py", line 1033, in _run
    self.strategy.setup_environment()
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/strategies/ddp.py", line 154, in setup_environment
    super().setup_environment()
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/strategies/strategy.py", line 129, in setup_environment
    self.accelerator.setup_device(self.root_device)
  File "/home/aa5dw/.local/lib/python3.12/site-packages/pytorch_lightning/accelerators/cuda.py", line 47, in setup_device
    torch.cuda.set_device(device)
  File "/home/aa5dw/.local/lib/python3.12/site-packages/torch/cuda/__init__.py", line 584, in set_device
    torch._C._cuda_setDevice(device)
  File "/home/aa5dw/.local/lib/python3.12/site-packages/torch/cuda/__init__.py", line 412, in _lazy_init
    raise RuntimeError(
RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method


In [10]:
processor.efficient_create_display_df()

Generating display_df: 100%|██████████| 2160/2160 [00:16<00:00, 129.48it/s]


In [11]:
processor.display_df

Unnamed: 0,Unique_id,Reference Date,Target End Date,GT,Quantile,Prediction
0,Y_1,2011-10-15,2011-10-15,0.0,0.025,-1.503800
1,Y_1,2011-10-15,2011-10-15,0.0,0.050,-1.135317
2,Y_1,2011-10-15,2011-10-15,0.0,0.075,-0.977841
3,Y_1,2011-10-15,2011-10-15,0.0,0.100,-0.485586
4,Y_1,2011-10-15,2011-10-15,0.0,0.150,-0.466544
...,...,...,...,...,...,...
198715,Y_9999,1957-03-02,1957-03-23,0.0,0.850,1.503745
198716,Y_9999,1957-03-02,1957-03-23,0.0,0.900,1.503082
198717,Y_9999,1957-03-02,1957-03-23,0.0,0.925,1.372682
198718,Y_9999,1957-03-02,1957-03-23,0.0,0.950,1.540235


In [12]:
processor.display_df.to_csv('../../output/forecasts_TCN.csv',index=None)

In [13]:
wis_table = processor.efficient_compute_wis()

Computing WIS: 100%|██████████| 8640/8640 [00:03<00:00, 2633.64it/s]


In [14]:
wis_table

Unnamed: 0,Unique_id,Reference Date,Target End Date,GT,WIS
0,Y_1,2011-10-15,2011-10-15,0.0,0.220162
1,Y_1,2011-10-15,2011-10-22,0.0,0.005858
2,Y_1,2011-10-15,2011-10-29,0.0,0.019468
3,Y_1,2011-10-15,2011-11-05,0.0,0.079281
4,Y_1000,2022-06-11,2022-06-11,0.0,0.220162
...,...,...,...,...,...
8635,Y_9998,1955-03-26,1955-04-16,0.0,0.950220
8636,Y_9999,1957-03-02,1957-03-02,0.0,0.910064
8637,Y_9999,1957-03-02,1957-03-09,0.0,0.817035
8638,Y_9999,1957-03-02,1957-03-16,0.0,0.924174


In [13]:
wis_table.to_csv('../../eval/WIS_TCN.csv')

In [14]:
np.mean(wis_table['WIS'].values)

84.412364978793

In [15]:
wis_dfs = [wis_table.iloc[i::4].reset_index(drop=True) for i in range(4)]

In [19]:
np.mean(wis_dfs[3]['WIS'].values)

86.18797034887922