In [6]:
# 04_multihorizon_baseline_ratio.ipynb

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.arima.model import ARIMA

sns.set_theme(font_scale=1.1)

DATA_FOLDER = '../data'

all_coins = [
    "Aave",
    "BinanceCoin",
    "Bitcoin",
    "Cardano",
    "ChainLink",
    "Cosmos",
    "CryptocomCoin",
    "Dogecoin",
    "EOS",
    "Ethereum",
    "Iota",
    "Litecoin",
    "Monero",
    "NEM",
    "Polkadot",
    "Solana",
    "Stellar",
    "Tether",
    "Tron",
    "Uniswap",
    "USDCoin",
    "WrappedBitcoin",
    "XRP"
]

def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred)**2))

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def find_best_arima(train_series, p_range=(0,2), d_range=(0,2), q_range=(0,2)):
    best_aic = float('inf')
    best_order = None
    best_model_fit = None

    for p in range(p_range[0], p_range[1]+1):
        for d in range(d_range[0], d_range[1]+1):
            for q in range(q_range[0], q_range[1]+1):
                try:
                    model = ARIMA(train_series, order=(p,d,q))
                    fitted = model.fit()
                    if fitted.aic < best_aic:
                        best_aic = fitted.aic
                        best_order = (p,d,q)
                        best_model_fit = fitted
                except:
                    continue
    return best_order, best_model_fit

# We'll look for these horizon columns
horizons = ['Close_t+1','Close_t+7','Close_t+30','Close_t+90']

results = []

for coin in all_coins:
    print(f"\n=== Multi-Horizon Baseline for {coin} ===")
    train_csv = f"{coin.lower()}_train_multi.csv"
    test_csv  = f"{coin.lower()}_test_multi.csv"

    train_path = os.path.join(DATA_FOLDER, train_csv)
    test_path  = os.path.join(DATA_FOLDER, test_csv)

    if not (os.path.exists(train_path) and os.path.exists(test_path)):
        print(f"[{coin}] Missing multi-horizon train/test, skipping coin.")
        continue

    train_df = pd.read_csv(train_path, parse_dates=['Date'])
    test_df  = pd.read_csv(test_path,  parse_dates=['Date'])

    if len(train_df) < 20:
        print(f"[{coin}] Train set <20 rows, skipping ARIMA.")
        continue
    if len(test_df) == 0:
        print(f"[{coin}] Test set empty, skipping coin.")
        continue

    # ARIMA on train's 'Close'
    train_close = train_df['Close']
    best_order, best_fit = find_best_arima(train_close)
    if best_fit is None:
        print(f"[{coin}] No ARIMA found for p,d,q up to (2,2,2). Skipping coin.")
        continue

    # We'll forecast the entire test length
    steps_needed = len(test_df)
    if steps_needed <= 0:
        print(f"[{coin}] steps_needed={steps_needed}, skipping coin.")
        continue

    try:
        forecast_values = best_fit.forecast(steps=steps_needed)
        test_df['arima_pred_close'] = forecast_values.values
    except ValueError as e:
        print(f"[{coin}] ARIMA forecast error: {e}")
        continue

    # Evaluate each horizon
    for horizon_col in horizons:
        if horizon_col not in test_df.columns:
            continue

        # If it's all NaN, skip
        if test_df[horizon_col].isna().all():
            print(f"[{coin}] {horizon_col} is all NaN, skipping horizon.")
            continue

        # Naive approach
        test_df[f'naive_{horizon_col}'] = test_df['Close']  # same-day price
        y_true = test_df[horizon_col]
        y_naive = test_df[f'naive_{horizon_col}']

        naive_rmse_val = rmse(y_true, y_naive)
        naive_mae_val  = mae(y_true, y_naive)

        # ARIMA approach
        y_arima = test_df['arima_pred_close']
        arima_rmse_val = rmse(y_true, y_arima)
        arima_mae_val  = mae(y_true, y_arima)

        print(f"{coin} - {horizon_col}: NaiveRMSE={naive_rmse_val:.2f}, ARIMARMSE={arima_rmse_val:.2f}")

        results.append({
            'Coin': coin,
            'Horizon': horizon_col,
            'Naive_RMSE': naive_rmse_val,
            'Naive_MAE': naive_mae_val,
            'ARIMA_Order': best_order,
            'ARIMA_RMSE': arima_rmse_val,
            'ARIMA_MAE': arima_mae_val
        })

# Save results
results_df = pd.DataFrame(results)
out_csv = os.path.join(DATA_FOLDER, 'baseline_results_multihorizon.csv')
results_df.to_csv(out_csv, index=False)
print(f"\n=== Saved multi-horizon baseline results to: {out_csv}")
print(results_df.head(30))



=== Multi-Horizon Baseline for Aave ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Aave - Close_t+1: NaiveRMSE=18.27, ARIMARMSE=31.15
Aave - Close_t+7: NaiveRMSE=37.91, ARIMARMSE=26.02
Aave - Close_t+30: NaiveRMSE=63.20, ARIMARMSE=70.79
Aave - Close_t+90: NaiveRMSE=110.12, ARIMARMSE=101.00

=== Multi-Horizon Baseline for BinanceCoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


BinanceCoin - Close_t+1: NaiveRMSE=13.63, ARIMARMSE=115.62
BinanceCoin - Close_t+7: NaiveRMSE=40.09, ARIMARMSE=138.55
BinanceCoin - Close_t+30: NaiveRMSE=112.77, ARIMARMSE=213.25
BinanceCoin - Close_t+90: NaiveRMSE=227.23, ARIMARMSE=278.97

=== Multi-Horizon Baseline for Bitcoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Bitcoin - Close_t+1: NaiveRMSE=902.24, ARIMARMSE=15788.44
Bitcoin - Close_t+7: NaiveRMSE=2590.01, ARIMARMSE=16604.60
Bitcoin - Close_t+30: NaiveRMSE=5575.82, ARIMARMSE=18916.86
Bitcoin - Close_t+90: NaiveRMSE=14373.22, ARIMARMSE=21099.40

=== Multi-Horizon Baseline for Cardano ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Cardano - Close_t+1: NaiveRMSE=0.04, ARIMARMSE=0.48
Cardano - Close_t+7: NaiveRMSE=0.10, ARIMARMSE=0.51
Cardano - Close_t+30: NaiveRMSE=0.27, ARIMARMSE=0.62
Cardano - Close_t+90: NaiveRMSE=0.64, ARIMARMSE=0.94

=== Multi-Horizon Baseline for ChainLink ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


ChainLink - Close_t+1: NaiveRMSE=1.28, ARIMARMSE=12.53
ChainLink - Close_t+7: NaiveRMSE=3.05, ARIMARMSE=13.21
ChainLink - Close_t+30: NaiveRMSE=6.33, ARIMARMSE=16.23
ChainLink - Close_t+90: NaiveRMSE=11.75, ARIMARMSE=19.29

=== Multi-Horizon Baseline for Cosmos ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Cosmos - Close_t+1: NaiveRMSE=0.94, ARIMARMSE=10.35
Cosmos - Close_t+7: NaiveRMSE=2.75, ARIMARMSE=11.03
Cosmos - Close_t+30: NaiveRMSE=5.83, ARIMARMSE=13.23
Cosmos - Close_t+90: NaiveRMSE=12.38, ARIMARMSE=14.90

=== Multi-Horizon Baseline for CryptocomCoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


CryptocomCoin - Close_t+1: NaiveRMSE=0.01, ARIMARMSE=0.06
CryptocomCoin - Close_t+7: NaiveRMSE=0.02, ARIMARMSE=0.07
CryptocomCoin - Close_t+30: NaiveRMSE=0.05, ARIMARMSE=0.08
CryptocomCoin - Close_t+90: NaiveRMSE=0.10, ARIMARMSE=0.08

=== Multi-Horizon Baseline for Dogecoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Dogecoin - Close_t+1: NaiveRMSE=0.00, ARIMARMSE=0.02
Dogecoin - Close_t+7: NaiveRMSE=0.01, ARIMARMSE=0.02
Dogecoin - Close_t+30: NaiveRMSE=0.07, ARIMARMSE=0.08
Dogecoin - Close_t+90: NaiveRMSE=0.13, ARIMARMSE=0.15

=== Multi-Horizon Baseline for EOS ===


  warn('Non-invertible starting MA parameters found.'


EOS - Close_t+1: NaiveRMSE=0.22, ARIMARMSE=0.98
EOS - Close_t+7: NaiveRMSE=0.55, ARIMARMSE=1.16
EOS - Close_t+30: NaiveRMSE=1.17, ARIMARMSE=1.76
EOS - Close_t+90: NaiveRMSE=2.20, ARIMARMSE=2.60

=== Multi-Horizon Baseline for Ethereum ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Ethereum - Close_t+1: NaiveRMSE=44.39, ARIMARMSE=678.14
Ethereum - Close_t+7: NaiveRMSE=124.29, ARIMARMSE=717.90
Ethereum - Close_t+30: NaiveRMSE=326.50, ARIMARMSE=927.72
Ethereum - Close_t+90: NaiveRMSE=757.58, ARIMARMSE=1316.09

=== Multi-Horizon Baseline for Iota ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Iota - Close_t+1: NaiveRMSE=0.05, ARIMARMSE=0.53
Iota - Close_t+7: NaiveRMSE=0.15, ARIMARMSE=0.60
Iota - Close_t+30: NaiveRMSE=0.36, ARIMARMSE=0.80
Iota - Close_t+90: NaiveRMSE=0.79, ARIMARMSE=0.93

=== Multi-Horizon Baseline for Litecoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Litecoin - Close_t+1: NaiveRMSE=5.13, ARIMARMSE=50.49
Litecoin - Close_t+7: NaiveRMSE=14.60, ARIMARMSE=53.88
Litecoin - Close_t+30: NaiveRMSE=29.77, ARIMARMSE=68.38
Litecoin - Close_t+90: NaiveRMSE=59.27, ARIMARMSE=81.69

=== Multi-Horizon Baseline for Monero ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Monero - Close_t+1: NaiveRMSE=6.06, ARIMARMSE=78.42
Monero - Close_t+7: NaiveRMSE=16.59, ARIMARMSE=83.44
Monero - Close_t+30: NaiveRMSE=43.56, ARIMARMSE=110.81
Monero - Close_t+90: NaiveRMSE=77.50, ARIMARMSE=137.59

=== Multi-Horizon Baseline for NEM ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'


NEM - Close_t+1: NaiveRMSE=0.02, ARIMARMSE=0.19
NEM - Close_t+7: NaiveRMSE=0.06, ARIMARMSE=0.19
NEM - Close_t+30: NaiveRMSE=0.11, ARIMARMSE=0.21
NEM - Close_t+90: NaiveRMSE=0.18, ARIMARMSE=0.22

=== Multi-Horizon Baseline for Polkadot ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Polkadot - Close_t+1: NaiveRMSE=1.81, ARIMARMSE=7.79
Polkadot - Close_t+7: NaiveRMSE=4.61, ARIMARMSE=6.70
Polkadot - Close_t+30: NaiveRMSE=4.81, ARIMARMSE=8.69
Polkadot - Close_t+90: NaiveRMSE=17.07, ARIMARMSE=24.20

=== Multi-Horizon Baseline for Solana ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Solana - Close_t+1: NaiveRMSE=1.14, ARIMARMSE=10.93
Solana - Close_t+7: NaiveRMSE=3.52, ARIMARMSE=12.96
Solana - Close_t+30: NaiveRMSE=14.52, ARIMARMSE=24.27
Solana - Close_t+90: NaiveRMSE=26.62, ARIMARMSE=34.41

=== Multi-Horizon Baseline for Stellar ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Stellar - Close_t+1: NaiveRMSE=0.01, ARIMARMSE=0.15
Stellar - Close_t+7: NaiveRMSE=0.04, ARIMARMSE=0.17
Stellar - Close_t+30: NaiveRMSE=0.07, ARIMARMSE=0.20
Stellar - Close_t+90: NaiveRMSE=0.14, ARIMARMSE=0.24

=== Multi-Horizon Baseline for Tether ===


  warn('Non-invertible starting MA parameters found.'


Tether - Close_t+1: NaiveRMSE=0.00, ARIMARMSE=0.00
Tether - Close_t+7: NaiveRMSE=0.00, ARIMARMSE=0.00
Tether - Close_t+30: NaiveRMSE=0.00, ARIMARMSE=0.00
Tether - Close_t+90: NaiveRMSE=0.00, ARIMARMSE=0.00

=== Multi-Horizon Baseline for Tron ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Tron - Close_t+1: NaiveRMSE=0.00, ARIMARMSE=0.02
Tron - Close_t+7: NaiveRMSE=0.01, ARIMARMSE=0.03
Tron - Close_t+30: NaiveRMSE=0.03, ARIMARMSE=0.04
Tron - Close_t+90: NaiveRMSE=0.04, ARIMARMSE=0.05

=== Multi-Horizon Baseline for Uniswap ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Uniswap - Close_t+1: NaiveRMSE=1.71, ARIMARMSE=5.53
Uniswap - Close_t+7: NaiveRMSE=3.35, ARIMARMSE=6.11
Uniswap - Close_t+30: NaiveRMSE=7.40, ARIMARMSE=11.17
Uniswap - Close_t+90: NaiveRMSE=9.45, ARIMARMSE=5.05

=== Multi-Horizon Baseline for USDCoin ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'


USDCoin - Close_t+1: NaiveRMSE=0.00, ARIMARMSE=0.00
USDCoin - Close_t+7: NaiveRMSE=0.00, ARIMARMSE=0.00
USDCoin - Close_t+30: NaiveRMSE=0.00, ARIMARMSE=0.00
USDCoin - Close_t+90: NaiveRMSE=0.00, ARIMARMSE=0.00

=== Multi-Horizon Baseline for WrappedBitcoin ===


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


WrappedBitcoin - Close_t+1: NaiveRMSE=1667.78, ARIMARMSE=26119.09
WrappedBitcoin - Close_t+7: NaiveRMSE=4788.57, ARIMARMSE=27637.09
WrappedBitcoin - Close_t+30: NaiveRMSE=10129.64, ARIMARMSE=31865.84
WrappedBitcoin - Close_t+90: NaiveRMSE=24861.82, ARIMARMSE=34435.13

=== Multi-Horizon Baseline for XRP ===


  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


XRP - Close_t+1: NaiveRMSE=0.03, ARIMARMSE=0.17
XRP - Close_t+7: NaiveRMSE=0.10, ARIMARMSE=0.22
XRP - Close_t+30: NaiveRMSE=0.24, ARIMARMSE=0.33
XRP - Close_t+90: NaiveRMSE=0.34, ARIMARMSE=0.43

=== Saved multi-horizon baseline results to: ../data\baseline_results_multihorizon.csv
             Coin     Horizon    Naive_RMSE    Naive_MAE ARIMA_Order  \
0            Aave   Close_t+1     18.265138    15.094809   (1, 1, 2)   
1            Aave   Close_t+7     37.906837    32.636441   (1, 1, 2)   
2            Aave  Close_t+30     63.201110    52.020578   (1, 1, 2)   
3            Aave  Close_t+90    110.123263   101.843564   (1, 1, 2)   
4     BinanceCoin   Close_t+1     13.630565     5.007229   (2, 1, 2)   
5     BinanceCoin   Close_t+7     40.087401    16.957944   (2, 1, 2)   
6     BinanceCoin  Close_t+30    112.771262    61.253317   (2, 1, 2)   
7     BinanceCoin  Close_t+90    227.229684   143.219464   (2, 1, 2)   
8         Bitcoin   Close_t+1    902.244539   452.576487   (2, 1, 2)  

