In [7]:
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from statsforecast import StatsForecast
from statsforecast.models import AutoETS
import random

In [8]:

def get_quantile_format(fctdf,model_name):
    qfctdf=pd.DataFrame()
    map_col={}
    for col in fctdf.columns:
        if col == "unique_id":
            continue
    
        quantile = None
        if "lo" in col:
            number = int(col[-2:])
            alpha = 1 - (number / 100)
            quantile = round(alpha / 2, 3)
        elif "hi" in col:
            number = int(col[-2:])
            alpha = 1 - (number / 100)
            quantile = round(1 - (alpha / 2), 3)
        elif col.endswith("-median") or col == model_name:
            quantile = 0.5
        else:
            continue
        map_col[col]=quantile
    
    qfctdf=fctdf.set_index('ds').melt(id_vars='unique_id',var_name='output_type_id',ignore_index=False)
    qfctdf.output_type_id=qfctdf.output_type_id.map(map_col)
    qfctdf=qfctdf.reset_index()
    qfctdf=qfctdf.rename(columns={'ds':'target_end_date','Reference Date':'reference_date'})
    # qfctdf.loc[:,'reference_date']=reference_date

    uid=fctdf.unique_id.unique()[0]
    gt_series = df_test_all[df_test_all.unique_id==uid]#.loc[df_col.index]

    targ_dates=fctdf['ds'].values
    gt = gt_series[gt_series.ds.isin(targ_dates)]
    gt=gt.rename(columns={'ds':'target_end_date','y':'GT'})

    qfctdf=qfctdf.merge(gt,on=['target_end_date','unique_id'])
    uid=fctdf.unique_id.unique()
    # qfctdf.loc[:,'unique_id']=uid
    return qfctdf
def get_test_fcts(df_test_all,model_name,saved_path):
    nf2 = NeuralForecast.load(path=saved_path)
    all_fctdf=pd.DataFrame()
    uids=df_test_all.unique_id.unique()
    for uid in uids:
        df_test=df_test_all[df_test_all.unique_id==uid]
        for i in range(8,56):
            test_data=df_test.iloc[:i]
            # uid = test_data.unique_id.unique()[0]
            reference_date = test_data.index[-1]
            fctdf=nf2.predict(test_data)
            qfctdf=get_quantile_format(fctdf,model_name=model_name)
            qfctdf.loc[:,'model']=model_name
            qfctdf.loc[:,'reference_date']=reference_date
            all_fctdf=pd.concat([all_fctdf,qfctdf])
    return all_fctdf

def get_wis(all_fctdf):
    results = []
    grouped = all_fctdf.groupby(["unique_id", "reference_date", "target_end_date"])
    
    for (uid, ref_date, tgt_date), group in grouped:
        gt = group["GT"].iloc[0]
        group = group.sort_values("output_type_id")
    
        if not np.any(np.isclose(group["output_type_id"], 0.5)):
            continue
    
        median_pred = group[np.isclose(group["output_type_id"], 0.5)]["value"].iloc[0]
        ae = abs(median_pred - gt)
    
        lo = group[group["output_type_id"] < 0.5]
        hi = group[group["output_type_id"] > 0.5][::-1].reset_index(drop=True)
    
        wis_components = []
        for i in range(min(len(lo), len(hi))):
            alpha = hi.iloc[i]["output_type_id"] - lo.iloc[i]["output_type_id"]
            lo_pred, hi_pred = lo.iloc[i]["value"], hi.iloc[i]["value"]
            interval_score = (
                hi_pred - lo_pred
                + (2 / alpha) * (lo_pred - gt) * (gt < lo_pred)
                + (2 / alpha) * (gt - hi_pred) * (gt > hi_pred)
            )
            wis_components.append(interval_score)
    
        wis = (ae + np.sum(wis_components)) / (1 + len(wis_components))
        results.append({
            "Unique_id": uid,
            "Reference Date": ref_date,
            "Target End Date": tgt_date,
            "GT": gt,
            "WIS": wis})
    return pd.DataFrame(results)

In [9]:
# Load and process the MEASLES_ARIZONA data
df = pd.read_csv("../outbreaks_disease_location.csv")
value_columns = [str(i) for i in range(60)]
series_values = df[value_columns].fillna(0).astype(float)
start_dates = pd.to_datetime(df["start_date"])

# Shuffle and split
shuffled_indices = df.sample(frac=1, random_state=42).index
split_point = int(0.8 * len(df))
train_indices = shuffled_indices[:split_point]
test_indices = shuffled_indices[split_point:]

In [10]:
seed_value=256
random.seed(seed_value)
N=100
sub_test_indices=random.sample(list(test_indices),N)

In [11]:
train_records = []
for i, row in series_values.iloc[train_indices].iterrows():
    dates = pd.date_range(start="2000-01-01", periods=60, freq="W-SAT")
    for t, value in enumerate(row):
        train_records.append({"unique_id": f"Y{i+1}", "ds": dates[t], "y": value})
df_train = pd.DataFrame(train_records)

test_start_dates = start_dates.loc[test_indices] - pd.Timedelta(weeks=4)
df_test_all = []

for idx in sub_test_indices:
    start_date = test_start_dates.loc[idx]
    row = series_values.loc[idx]
    dates = pd.date_range(start=start_date, periods=60, freq="W-SAT")
    for t, value in enumerate(row):
        df_test_all.append({"unique_id": f"Y_{idx}", "ds": dates[t], "y": value})

df_test_all = pd.DataFrame(df_test_all)

In [12]:
uids=df_test_all.unique_id.unique()
m='AutoETS'
freq='W-SAT'
h=4
allforecasts=pd.DataFrame()
for uid in uids:
    filt_data=df_test_all[df_test_all.unique_id==uid]
    # allqfctdf=pd.DataFrame()
    # for i in range(8,56):
    for i in range(8,56):
        data=filt_data.iloc[:i]
        reference_date = data.iloc[-1]['ds']
        sf=StatsForecast(models=[AutoETS(model="AZN")], freq=freq, n_jobs=-1)
        sf.fit(data)
        forecast = sf.predict(h=h, level = [10,20,30,40,50,60,70,80, 85,90,95])
        # reference_date=data.iloc[-1]['ds']
        # forecast.loc[:,'reference_date']=reference_date
        qfctdf=get_quantile_format(forecast,model_name=m)
    
        qfctdf.loc[:,'reference_date']=reference_date
        qfctdf.loc[:,'model']=m
    
        # allqfctdf=pd.concat([allforecasts,qfctdf])
        allforecasts=pd.concat([allforecasts,qfctdf])
        
allforecasts.to_csv(f'../../../output/forecasts/{m}_{N}_{seed_value}.csv',index=None)
        
evaldf=get_wis(allforecasts)
evaldf.loc[:,'model']=m
        #             evaldf.loc[:,'model']=m
evaldf.to_csv(f'../../../eval/eval_fcts/{m}_{N}_{seed_value}.csv',index=None)
        # qfctdf.to_csv('')

In [4]:
class FixedETSProcessor:
    def __init__(self):
        self.forecasts = []
        self.eval_pairs = []
        self.dates = []
        self.unique_ids = []

        self.maes = []
        self.mses = []
        self.mapes = []
        self.nmses = []

        self.metrics_df = pd.DataFrame(columns=["Reference Date", "MAE", "MSE", "MAPE", "NMSE"])
        self.display_df = pd.DataFrame(columns=["Unique_id", "Reference Date", "Target End Date", "GT", "Quantile", "Prediction"])

    def create_fixed_model(self, df_long, h, freq="W-SAT", level=[80, 95]):
        df_fit = df_long.groupby("unique_id").apply(lambda g: g.iloc[:-h]).reset_index(drop=True)
        df_truth = df_long.groupby("unique_id").apply(lambda g: g.iloc[-h:]).reset_index(drop=True)

        start = time.time()
        self.sf = StatsForecast(models=[AutoETS(model="AZN")], freq=freq, n_jobs=-1)
        self.sf.fit(df_fit)
        forecast = self.sf.predict(h=h, level=level)
        print(f"ETS fit time: {time.time() - start:.2f} sec")

        forecast.set_index(["unique_id", "ds"], inplace=True)
        df_truth.set_index(["unique_id", "ds"], inplace=True)

        print("Processing forecasts per series...")
        for uid in tqdm(df_fit["unique_id"].unique(), desc="Fitting per series"):
            f = forecast.loc[uid].copy()
            f["unique_id"] = uid
            t = df_truth.loc[uid]
            self.forecasts.append(f)
            self.eval_pairs.append((f, t))
            self.unique_ids.append(uid)
            self.dates.append(df_fit[df_fit["unique_id"] == uid]["ds"].max().strftime("%Y-%m-%d"))

    def calculate_metrics(self):
        for forecast_df, truth_df in self.eval_pairs:
            y_true = truth_df.iloc[:, 0]
            y_pred = forecast_df.iloc[:, 0]
            mae = mean_absolute_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
            mape = mean_absolute_percentage_error(y_true, y_pred)
            nmse = mse / np.var(y_true)

            self.maes.append(mae)
            self.mses.append(mse)
            self.mapes.append(mape)
            self.nmses.append(nmse)

    def create_metrics_df(self):
        self.metrics_df = pd.DataFrame({
            "Reference Date": self.dates,
            "MAE": self.maes,
            "MSE": self.mses,
            "MAPE": self.mapes,
            "NMSE": self.nmses,
        })

    def create_display_df(self):
        records = []
        print("Generating display DataFrame...")
        for i in tqdm(range(len(self.forecasts)), desc="Building display_df"):
            forecast_df = self.forecasts[i]
            reference_date = self.dates[i]
            unique_id = self.unique_ids[i]
            truth_series = self.eval_pairs[i][1].iloc[:, 0]

            for col in forecast_df.columns:
                if col == "unique_id":
                    continue
                if "lo" in col or "hi" in col:
                    number = int(col.split("-")[-1])
                    alpha = 1 - (number / 100)
                    quantile = 1 - (alpha / 2) if "hi" in col else alpha / 2
                elif col == "AutoETS":
                    quantile = 0.5
                else:
                    continue

                preds = forecast_df[col]
                for idx, pred in preds.items():
                    records.append({
                        "Unique_id": unique_id,
                        "Reference Date": reference_date,
                        "Target End Date": idx,
                        "GT": truth_series.get(idx, np.nan),
                        "Quantile": quantile,
                        "Prediction": pred
                    })

        self.display_df = pd.DataFrame(records).sort_values(
            by=["Unique_id", "Reference Date", "Target End Date", "GT", "Quantile"]
        ).reset_index(drop=True)

    def compute_wis(self):
        df = self.display_df.sort_values(by=["Unique_id", "Reference Date", "Target End Date", "Quantile"])
        records = []
        grouped = df.groupby(["Unique_id", "Reference Date", "Target End Date"])

        print("Computing WIS for each forecasted point...")
        for (uid, ref_date, tgt_date), group in tqdm(grouped, desc="Computing WIS"):
            gt = group["GT"].iloc[0]
            preds = group.set_index("Quantile")["Prediction"]

            if 0.5 not in preds.index:
                continue

            ae = abs(preds[0.5] - gt)
            quantiles = sorted(q for q in preds.index if q != 0.5)
            n = len(quantiles) // 2
            interval_scores = []

            for i in range(n):
                lo_q = quantiles[i]
                hi_q = quantiles[-(i + 1)]
                lo = preds[lo_q]
                hi = preds[hi_q]
                alpha = hi_q - lo_q

                interval_score = (
                    (hi - lo)
                    + (2 / alpha) * max(lo - gt, 0)
                    + (2 / alpha) * max(gt - hi, 0)
                )
                interval_scores.append(interval_score)

            wis = (ae + np.sum(interval_scores)) / (1 + len(interval_scores))
            records.append({
                "Unique_id": uid,
                "Reference Date": ref_date,
                "Target End Date": tgt_date,
                "GT": gt,
                "WIS": wis
            })

        return pd.DataFrame(records)

In [5]:
processor = FixedETSProcessor()

In [6]:
processor.create_fixed_model(df_long=df_long, h=4, freq="W-SAT", level=[10,20,30,40,50,60,70,80,85,90,95])

ETS fit time: 3.83 sec
Processing forecasts per series...


Fitting per series: 100%|██████████| 10799/10799 [05:31<00:00, 32.60it/s]


In [7]:
processor.create_display_df()

Generating display DataFrame...


Building display_df: 100%|██████████| 10799/10799 [00:18<00:00, 579.06it/s]


In [14]:
processor.display_df

Unnamed: 0,Unique_id,Reference Date,Target End Date,GT,Quantile,Prediction
0,Y_1,2025-06-14,2025-06-21,0.0,0.025,-10.364071
1,Y_1,2025-06-14,2025-06-21,0.0,0.050,-8.697803
2,Y_1,2025-06-14,2025-06-21,0.0,0.075,-7.612082
3,Y_1,2025-06-14,2025-06-21,0.0,0.100,-6.776702
4,Y_1,2025-06-14,2025-06-21,0.0,0.150,-5.480544
...,...,...,...,...,...,...
993503,Y_9999,1955-03-19,1955-04-16,0.0,0.850,9.489182
993504,Y_9999,1955-03-19,1955-04-16,0.0,0.900,11.722034
993505,Y_9999,1955-03-19,1955-04-16,0.0,0.925,13.161119
993506,Y_9999,1955-03-19,1955-04-16,0.0,0.950,15.031458


In [15]:
wis_df = processor.compute_wis()

Computing WIS for each forecasted point...


Computing WIS: 100%|██████████| 43196/43196 [00:16<00:00, 2558.04it/s]


In [16]:
wis_df

Unnamed: 0,Unique_id,Reference Date,Target End Date,GT,WIS
0,Y_1,2025-06-14,2025-06-21,0.0,8.960481
1,Y_1,2025-06-14,2025-06-28,0.0,12.671400
2,Y_1,2025-06-14,2025-07-05,0.0,15.518974
3,Y_1,2025-06-14,2025-07-12,0.0,17.919618
4,Y_10,2016-10-01,2016-10-08,0.0,0.238880
...,...,...,...,...,...
43191,Y_9998,1954-12-25,1955-01-22,0.0,0.945395
43192,Y_9999,1955-03-19,1955-03-26,0.0,9.180771
43193,Y_9999,1955-03-19,1955-04-02,0.0,11.647238
43194,Y_9999,1955-03-19,1955-04-09,0.0,13.675754


In [18]:
np.mean(wis_df['WIS'].values)

1671.3781628538525

In [17]:
wis_dfs = [wis_df.iloc[i::4].reset_index(drop=True) for i in range(4)]

In [22]:
np.mean(wis_dfs[3]['WIS'].values)

2167.149866930106

In [23]:
wis_df.to_csv('ETS_DF_WIS.csv')