In [None]:
# 1. Imports and Setup

import sys, os

sys.path.append(os.path.abspath(".."))
from src.notebook_setup import *
from src.evaluation import run_statistical_comparison
from src.visualization import plot_dm_results

tracker = ExperimentTracker()

In [None]:
# 2. Data Loading and Configuration

# === DATASET CONFIGURATION ===
DATASET_CONFIG = {
    "file_path": "../datasets/kaggle_btcusd_hourly.csv",
    "time_column": "datetime",
    "target_column": "btc_usd",
    "frequency": "H",  # 'MS'=monthly start, 'QS'=quarterly, 'YS'=yearly, 'H'=hourly, 'D'=daily
    "test_periods": 168,  # Number of periods to hold out for testing (168 = 7 days)
    "seasonal_period": 24,  # 12=monthly, 4=quarterly, 1=yearly, 24=hourly, 7=daily
    "name": "Kaggle BTC/USD Hourly",
    "cv_start_ratio": 0.9,  # Ratio to start CV splits (e.g., 0.9 = last 10% for validation) for bigger datasets
    "value_unit": "BTC/USD Close Price", 
}

# === LOAD AND PREPROCESS DATA ===
df = pd.read_csv(DATASET_CONFIG["file_path"])
df[DATASET_CONFIG["time_column"]] = pd.to_datetime(df[DATASET_CONFIG["time_column"]])
series = TimeSeries.from_dataframe(
    df, DATASET_CONFIG["time_column"], DATASET_CONFIG["target_column"]
)
series = series.astype(np.float32)

SMOKE_TEST = True
SMOKE_TEST_POINTS = 2000

if SMOKE_TEST:
    smoke_msg = f"!!! SMOKE TEST ENABLED: Using last {SMOKE_TEST_POINTS} points !!!"
    print(smoke_msg)
    print("=" * len(smoke_msg))
    series = series[-SMOKE_TEST_POINTS:]

# === TRAIN/TEST SPLIT ===
# Test set is held out completely - models never see it during training or validation
freq = DATASET_CONFIG["frequency"]
test_periods = DATASET_CONFIG["test_periods"]
offset_map = {
    "MS": pd.DateOffset(months=test_periods),
    "QS": pd.DateOffset(months=test_periods * 3),
    "YS": pd.DateOffset(years=test_periods),
    "D": pd.DateOffset(days=test_periods),
    "H": pd.DateOffset(hours=test_periods),
}
offset = offset_map.get(freq, pd.DateOffset(days=test_periods))

train, test = series.split_after(pd.Timestamp(series.end_time()) - offset)

# === SCALING (for Deep Learning models) ===
# IMPORTANT: Scaler is fitted ONLY on training data to prevent data leakage
scaler = Scaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)  # Transform only, no fitting
series_scaled = scaler.transform(series)

print(f"Dataset: {DATASET_CONFIG['name']}")
print(f"Total: {len(series)} | Train: {len(train)} | Test: {len(test)}")
print(f"Train period: {train.start_time()} to {train.end_time()}")
print(f"Test period:  {test.start_time()} to {test.end_time()}")
print(f"Seasonal period: {DATASET_CONFIG['seasonal_period']}")

series.plot(label="Full Series", figsize=(18, 5))
plt.axvline(x=test.start_time(), color="red", linestyle="--", label="Train/Test Split")
plt.title(f'{DATASET_CONFIG["name"]}' + (" [SMOKE TEST]" if SMOKE_TEST else ""))
plt.ylabel(DATASET_CONFIG["value_unit"])
plt.legend()
plt.show()

In [None]:
# 3. Constants

SEASONAL_PERIOD = DATASET_CONFIG["seasonal_period"]
STAT_GRIDS = get_statistical_grids(SEASONAL_PERIOD)
DL_GRIDS = get_dl_grids(SEASONAL_PERIOD)

TEST_PERIODS = DATASET_CONFIG["test_periods"]
CV_START_RATIO = DATASET_CONFIG["cv_start_ratio"]

USE_FULL_GRID = TUNING_CONFIG["USE_FULL_GRID"]
N_ITER = TUNING_CONFIG["N_ITER"]

In [None]:
# 4. Statistical Models

# Holt-Winters
run_tuning_and_eval(
    tracker,
    "Holt-Winters",
    ExponentialSmoothing,
    STAT_GRIDS["Holt-Winters"],
    train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)

# AutoARIMA
run_tuning_and_eval(
    tracker,
    "AutoARIMA",
    AutoARIMA,
    STAT_GRIDS["AutoARIMA"],
    train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)


# Prophet
run_tuning_and_eval(
    tracker,
    "Prophet",
    Prophet,
    STAT_GRIDS["Prophet"],
    train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)

In [None]:
# 5. Deep Learning Models

# TiDE
run_tuning_and_eval(
    tracker,
    "TiDE",
    TiDEModel,
    DL_GRIDS["TiDE"],
    train_scaled,
    is_dl=True,
    use_full_grid=USE_FULL_GRID,
    n_iter=N_ITER,
    scaler=scaler,
    original_train=train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)

# N-BEATS
run_tuning_and_eval(
    tracker,
    "N-BEATS",
    NBEATSModel,
    DL_GRIDS["N-BEATS"],
    train_scaled,
    is_dl=True,
    use_full_grid=USE_FULL_GRID,
    n_iter=N_ITER,
    scaler=scaler,
    original_train=train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)

# TFT
run_tuning_and_eval(
    tracker,
    "TFT",
    TFTModel,
    DL_GRIDS["TFT"],
    train_scaled,
    is_dl=True,
    use_full_grid=USE_FULL_GRID,
    n_iter=N_ITER,
    scaler=scaler,
    original_train=train,
    test_periods=TEST_PERIODS,
    seasonal_period=SEASONAL_PERIOD,
    cv_start_ratio=CV_START_RATIO,
)

In [None]:
# 6. Foundation Models (Chronos, TimeGPT)

run_foundation_models(tracker, train, test, DATASET_CONFIG["frequency"])

In [None]:
# 7. Results Comparison (Cross-Validation Metrics)

results_df = tracker.get_results_df()
print("=== Cross-Validation Results ===")
print(
    results_df[["Model", "RMSE", "MAPE", "Tuning Time (s)", "Combinations"]].to_string(
        index=False
    )
)

In [None]:
# 8. Train Final Predictions for ALL Models (for Statistical Comparison)

# Get list of all models that successfully ran
all_models = tracker.get_results_df()["Model"].tolist()

final_predictions = get_final_predictions(
    tracker, train, test, scaler, train_scaled, DATASET_CONFIG["frequency"],
    models_to_predict=all_models
)

In [None]:
# 8b. Statistical Significance Testing (Diebold-Mariano Test)

# Run statistical comparison (using h=test_periods for hourly data with sufficient samples)
dm_results_df = run_statistical_comparison(
    tracker, 
    final_predictions, 
    test, 
    h=DATASET_CONFIG["test_periods"]
)

# Visualize Results as Table
fig_dm = plot_dm_results(dm_results_df, DATASET_CONFIG["name"])
if fig_dm:
    fig_dm.show()

In [None]:
# 9. Visualize Final Predictions with Best and Fastest Models (Interactive)

fig = plot_forecast_comparison(
    train,
    test,
    final_predictions,
    DATASET_CONFIG["name"],
    target_column=DATASET_CONFIG.get("target_column"),
    value_unit=DATASET_CONFIG.get("value_unit"),
)
if fig:
    fig.show()
else:
    print("No predictions to visualize.")

In [None]:
# 10. All Models Comparison (Interactive)

fig_comparison = plot_model_comparison(
    tracker.get_results_df(),
    DATASET_CONFIG["name"],
    value_unit=DATASET_CONFIG.get("value_unit"),
    test_predictions=final_predictions
)
fig_comparison.show()

In [None]:
# 11. Export Static PNG Images

export_plots(fig, fig_comparison, DATASET_CONFIG["name"], fig_dm=fig_dm if 'fig_dm' in locals() else None)