In [2]:
# 04_multihorizon_baseline_no_pmdarima.ipynb
# ------------------------------------------------------------------
# PURPOSE:
#   1) For each coin, load the multi-horizon train/test CSVs (which
#      contain columns like 'Close_t+1', 'Close_t+7', etc.).
#   2) Compare two baselines:
#        (A) Naive = "today's Close" for all future predictions
#        (B) A simple ARIMA( p,d,q ) using statsmodels.tsa, 
#            with brute-force search over a small range of (p,d,q).
#   3) Evaluate the RMSE & MAE for each horizon, store in a CSV.
# ------------------------------------------------------------------

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.arima.model import ARIMA

sns.set_theme(font_scale=1.1)

DATA_FOLDER = "../data"

# EXAMPLE coin list (edit to match your actual coin abbreviations or filenames)
all_coins = [
    "ADA", "AVAX", "BCH", "BNB", "BTC",
    "DOGE", "DOT", "ETH", "LEO", "LINK",
    "LTC", "MATIC", "NEAR", "SHIB", "SOL",
    "TON", "TRX", "UNI", "XRP"
]

# Forecast horizons to evaluate
horizons = ["Close_t+1", "Close_t+7", "Close_t+30", "Close_t+90"]

# -------------------- Metrics -----------------------
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

# -------------------- ARIMA Search -----------------------
def find_best_arima(train_series, p_max=2, d_max=2, q_max=2):
    """
    Brute-force search over p,d,q in [0..p_max, 0..d_max, 0..q_max].
    Returns:
        best_order: (p,d,q)
        best_fit:   statsmodels ARIMAResults object
    """
    best_aic = float("inf")
    best_order = None
    best_fit = None

    for p in range(p_max+1):
        for d in range(d_max+1):
            for q in range(q_max+1):
                try:
                    model = ARIMA(train_series, order=(p, d, q))
                    fitted = model.fit(method_kwargs={"warn_convergence": False})
                    if fitted.aic < best_aic:
                        best_aic = fitted.aic
                        best_order = (p, d, q)
                        best_fit = fitted
                except:
                    # Some combos fail to converge or raise errors; just ignore
                    pass
    return best_order, best_fit

# --------------------------------------------------------
# MAIN LOGIC
# --------------------------------------------------------
results = []

for coin in all_coins:
    print(f"\n=== Multi-Horizon Baseline for {coin} ===")

    # 1) Load the train/test CSV that you created in the previous notebook
    train_csv = f"{coin.lower()}_train_multi.csv"
    test_csv  = f"{coin.lower()}_test_multi.csv"
    train_path = os.path.join(DATA_FOLDER, train_csv)
    test_path  = os.path.join(DATA_FOLDER, test_csv)

    # Skip if missing files
    if not (os.path.exists(train_path) and os.path.exists(test_path)):
        print(f"  [{coin}] Missing multi-horizon train/test CSV. Skipping coin.")
        continue

    # Read the CSVs
    train_df = pd.read_csv(train_path, parse_dates=["Date"], index_col="Date")
    test_df  = pd.read_csv(test_path,  parse_dates=["Date"], index_col="Date")

    # Force strictly daily frequency by reindexing and forward-filling
    train_df = train_df.asfreq("D").ffill()
    test_df  = test_df.asfreq("D").ffill()

    # Now we can explicitly set freq="D" without ValueError
    train_df.index.freq = "D"
    test_df.index.freq  = "D"

    # Basic checks
    if len(train_df) < 20:
        print(f"  [{coin}] Train set <20 rows, skipping ARIMA.")
        continue
    if len(test_df) == 0:
        print(f"  [{coin}] Test set empty, skipping coin.")
        continue

    # 2) Fit an ARIMA model on the train's 'Close'
    train_close = train_df["Close"].copy()
    best_order, best_fit = find_best_arima(train_close, p_max=2, d_max=2, q_max=2)

    if best_fit is None:
        print(f"  [{coin}] No ARIMA model found. Skipping coin.")
        continue

    print(f"  [{coin}] Best ARIMA order = {best_order}, AIC={best_fit.aic:.1f}")

    # 3) Forecast the entire test length
    steps_needed = len(test_df)
    try:
        forecast_vals = best_fit.forecast(steps=steps_needed)
        test_df["arima_pred_close"] = forecast_vals.values
    except ValueError as e:
        print(f"  [{coin}] ARIMA forecast error: {e}")
        continue

    # 4) Evaluate for each horizon
    for horizon_col in horizons:
        if horizon_col not in test_df.columns:
            continue
        if test_df[horizon_col].isna().all():
            print(f"  [{coin}] {horizon_col} is all NaN, skipping horizon.")
            continue

        # (A) Naive baseline => "today's Close" as the forecast
        test_df[f"naive_{horizon_col}"] = test_df["Close"]

        # True future price
        y_true = test_df[horizon_col]
        # Naive pred
        y_naive = test_df[f"naive_{horizon_col}"]
        # ARIMA pred (single model forecasting the test set)
        y_arima = test_df["arima_pred_close"]

        # Calculate metrics
        naive_rmse_val = rmse(y_true, y_naive)
        naive_mae_val  = mae(y_true, y_naive)
        arima_rmse_val = rmse(y_true, y_arima)
        arima_mae_val  = mae(y_true, y_arima)

        print(f"    {coin} - {horizon_col}: NaiveRMSE={naive_rmse_val:.2f}, "
              f"ARIMARMSE={arima_rmse_val:.2f}")

        results.append({
            "Coin": coin,
            "Horizon": horizon_col,
            "Naive_RMSE": naive_rmse_val,
            "Naive_MAE": naive_mae_val,
            "ARIMA_Order": best_order,
            "ARIMA_RMSE": arima_rmse_val,
            "ARIMA_MAE": arima_mae_val
        })

# 5) Save final results
results_df = pd.DataFrame(results)
out_csv = os.path.join(DATA_FOLDER, "baseline_results_multihorizon_no_pmdarima.csv")
results_df.to_csv(out_csv, index=False)
print(f"\n=== Saved multi-horizon baseline results to: {out_csv} ===")
print(results_df.head(30))




=== Multi-Horizon Baseline for ADA ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  [ADA] Best ARIMA order = (1, 1, 1), AIC=-6952.5
    ADA - Close_t+1: NaiveRMSE=0.02, ARIMARMSE=0.25
    ADA - Close_t+7: NaiveRMSE=0.07, ARIMARMSE=0.26
    ADA - Close_t+30: NaiveRMSE=0.19, ARIMARMSE=0.29
    ADA - Close_t+90: NaiveRMSE=0.31, ARIMARMSE=0.33

=== Multi-Horizon Baseline for AVAX ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [AVAX] Best ARIMA order = (2, 1, 2), AIC=6036.5
    AVAX - Close_t+1: NaiveRMSE=1.90, ARIMARMSE=10.24
    AVAX - Close_t+7: NaiveRMSE=4.96, ARIMARMSE=10.23
    AVAX - Close_t+30: NaiveRMSE=9.87, ARIMARMSE=9.48
    AVAX - Close_t+90: NaiveRMSE=15.75, ARIMARMSE=10.63

=== Multi-Horizon Baseline for BCH ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [BCH] Best ARIMA order = (2, 1, 2), AIC=23847.2
    BCH - Close_t+1: NaiveRMSE=17.90, ARIMARMSE=134.23
    BCH - Close_t+7: NaiveRMSE=47.39, ARIMARMSE=135.45
    BCH - Close_t+30: NaiveRMSE=89.07, ARIMARMSE=140.33
    BCH - Close_t+90: NaiveRMSE=148.57, ARIMARMSE=141.41

=== Multi-Horizon Baseline for BNB ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [BNB] Best ARIMA order = (2, 1, 2), AIC=16685.7
    BNB - Close_t+1: NaiveRMSE=14.28, ARIMARMSE=268.69
    BNB - Close_t+7: NaiveRMSE=36.99, ARIMARMSE=273.23
    BNB - Close_t+30: NaiveRMSE=67.11, ARIMARMSE=289.82
    BNB - Close_t+90: NaiveRMSE=135.59, ARIMARMSE=318.08

=== Multi-Horizon Baseline for BTC ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [BTC] Best ARIMA order = (1, 2, 1), AIC=65985.8
    BTC - Close_t+1: NaiveRMSE=1281.27, ARIMARMSE=20360.46
    BTC - Close_t+7: NaiveRMSE=3346.80, ARIMARMSE=20548.18
    BTC - Close_t+30: NaiveRMSE=7423.05, ARIMARMSE=21491.09
    BTC - Close_t+90: NaiveRMSE=14679.01, ARIMARMSE=23158.23

=== Multi-Horizon Baseline for DOGE ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  [DOGE] Best ARIMA order = (2, 1, 2), AIC=-21069.5
    DOGE - Close_t+1: NaiveRMSE=0.01, ARIMARMSE=0.09
    DOGE - Close_t+7: NaiveRMSE=0.02, ARIMARMSE=0.10
    DOGE - Close_t+30: NaiveRMSE=0.06, ARIMARMSE=0.11
    DOGE - Close_t+90: NaiveRMSE=0.10, ARIMARMSE=0.12

=== Multi-Horizon Baseline for DOT ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [DOT] Best ARIMA order = (1, 1, 1), AIC=4067.3
    DOT - Close_t+1: NaiveRMSE=0.33, ARIMARMSE=2.33
    DOT - Close_t+7: NaiveRMSE=0.90, ARIMARMSE=2.33
    DOT - Close_t+30: NaiveRMSE=1.92, ARIMARMSE=2.27
    DOT - Close_t+90: NaiveRMSE=2.84, ARIMARMSE=2.58

=== Multi-Horizon Baseline for ETH ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [ETH] Best ARIMA order = (2, 1, 2), AIC=30533.4
    ETH - Close_t+1: NaiveRMSE=81.77, ARIMARMSE=1194.25
    ETH - Close_t+7: NaiveRMSE=219.21, ARIMARMSE=1206.49
    ETH - Close_t+30: NaiveRMSE=435.89, ARIMARMSE=1253.25
    ETH - Close_t+90: NaiveRMSE=760.86, ARIMARMSE=1286.58

=== Multi-Horizon Baseline for LEO ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  [LEO] Best ARIMA order = (0, 1, 1), AIC=-2364.5
    LEO - Close_t+1: NaiveRMSE=0.13, ARIMARMSE=2.17
    LEO - Close_t+7: NaiveRMSE=0.33, ARIMARMSE=2.25
    LEO - Close_t+30: NaiveRMSE=0.91, ARIMARMSE=2.59
    LEO - Close_t+90: NaiveRMSE=1.83, ARIMARMSE=3.38

=== Multi-Horizon Baseline for LINK ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [LINK] Best ARIMA order = (1, 1, 1), AIC=5646.5
    LINK - Close_t+1: NaiveRMSE=0.73, ARIMARMSE=8.03
    LINK - Close_t+7: NaiveRMSE=1.87, ARIMARMSE=8.17
    LINK - Close_t+30: NaiveRMSE=3.93, ARIMARMSE=8.81
    LINK - Close_t+90: NaiveRMSE=6.76, ARIMARMSE=9.54

=== Multi-Horizon Baseline for LTC ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'


  [LTC] Best ARIMA order = (1, 1, 1), AIC=21973.4
    LTC - Close_t+1: NaiveRMSE=3.19, ARIMARMSE=28.25
    LTC - Close_t+7: NaiveRMSE=7.81, ARIMARMSE=28.53
    LTC - Close_t+30: NaiveRMSE=13.94, ARIMARMSE=30.18
    LTC - Close_t+90: NaiveRMSE=22.62, ARIMARMSE=33.92

=== Multi-Horizon Baseline for MATIC ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [MATIC] Best ARIMA order = (2, 1, 2), AIC=-4312.4
    MATIC - Close_t+1: NaiveRMSE=0.03, ARIMARMSE=0.28
    MATIC - Close_t+7: NaiveRMSE=0.08, ARIMARMSE=0.28
    MATIC - Close_t+30: NaiveRMSE=0.16, ARIMARMSE=0.29
    MATIC - Close_t+90: NaiveRMSE=0.24, ARIMARMSE=0.36

=== Multi-Horizon Baseline for NEAR ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  [NEAR] Best ARIMA order = (2, 1, 2), AIC=1549.6
    NEAR - Close_t+1: NaiveRMSE=0.36, ARIMARMSE=2.03
    NEAR - Close_t+7: NaiveRMSE=0.92, ARIMARMSE=2.03
    NEAR - Close_t+30: NaiveRMSE=1.52, ARIMARMSE=1.90
    NEAR - Close_t+90: NaiveRMSE=2.25, ARIMARMSE=1.46

=== Multi-Horizon Baseline for SHIB ===
  [SHIB] Best ARIMA order = (1, 0, 0), AIC=-30091.0
    SHIB - Close_t+1: NaiveRMSE=0.00, ARIMARMSE=0.00
    SHIB - Close_t+7: NaiveRMSE=0.00, ARIMARMSE=0.00
    SHIB - Close_t+30: NaiveRMSE=0.00, ARIMARMSE=0.00
    SHIB - Close_t+90: NaiveRMSE=0.00, ARIMARMSE=0.00

=== Multi-Horizon Baseline for SOL ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [SOL] Best ARIMA order = (0, 1, 1), AIC=7743.9
    SOL - Close_t+1: NaiveRMSE=6.90, ARIMARMSE=74.87
    SOL - Close_t+7: NaiveRMSE=17.99, ARIMARMSE=75.99
    SOL - Close_t+30: NaiveRMSE=39.43, ARIMARMSE=82.83
    SOL - Close_t+90: NaiveRMSE=54.21, ARIMARMSE=85.23

=== Multi-Horizon Baseline for TON ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


  [TON] Best ARIMA order = (1, 1, 1), AIC=-1036.3
    TON - Close_t+1: NaiveRMSE=0.24, ARIMARMSE=1.22
    TON - Close_t+7: NaiveRMSE=0.58, ARIMARMSE=1.22
    TON - Close_t+30: NaiveRMSE=0.92, ARIMARMSE=1.16
    TON - Close_t+90: NaiveRMSE=1.67, ARIMARMSE=1.05

=== Multi-Horizon Baseline for TRX ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [TRX] Best ARIMA order = (1, 0, 2), AIC=-16847.5
    TRX - Close_t+1: NaiveRMSE=0.01, ARIMARMSE=0.09
    TRX - Close_t+7: NaiveRMSE=0.02, ARIMARMSE=0.09
    TRX - Close_t+30: NaiveRMSE=0.03, ARIMARMSE=0.10
    TRX - Close_t+90: NaiveRMSE=0.05, ARIMARMSE=0.12

=== Multi-Horizon Baseline for UNI ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [UNI] Best ARIMA order = (1, 1, 1), AIC=3440.3
    UNI - Close_t+1: NaiveRMSE=0.62, ARIMARMSE=3.44
    UNI - Close_t+7: NaiveRMSE=1.52, ARIMARMSE=3.50
    UNI - Close_t+30: NaiveRMSE=3.05, ARIMARMSE=3.60
    UNI - Close_t+90: NaiveRMSE=4.77, ARIMARMSE=3.53

=== Multi-Horizon Baseline for XRP ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  [XRP] Best ARIMA order = (2, 0, 1), AIC=-11497.6
    XRP - Close_t+1: NaiveRMSE=0.04, ARIMARMSE=0.54
    XRP - Close_t+7: NaiveRMSE=0.12, ARIMARMSE=0.56
    XRP - Close_t+30: NaiveRMSE=0.33, ARIMARMSE=0.71
    XRP - Close_t+90: NaiveRMSE=0.65, ARIMARMSE=0.95

=== Saved multi-horizon baseline results to: ../data\baseline_results_multihorizon_no_pmdarima.csv ===
    Coin     Horizon    Naive_RMSE     Naive_MAE ARIMA_Order    ARIMA_RMSE  \
0    ADA   Close_t+1      0.024669      0.014846   (1, 1, 1)      0.249718   
1    ADA   Close_t+7      0.071919      0.044098   (1, 1, 1)      0.256589   
2    ADA  Close_t+30      0.187761      0.112849   (1, 1, 1)      0.294923   
3    ADA  Close_t+90      0.307139      0.226240   (1, 1, 1)      0.334300   
4   AVAX   Close_t+1      1.901590      1.342636   (2, 1, 2)     10.238011   
5   AVAX   Close_t+7      4.963243      3.685372   (2, 1, 2)     10.231983   
6   AVAX  Close_t+30      9.870560      7.373302   (2, 1, 2)      9.478157   
7   AVAX  C