In [1]:
from ray import tune
import matplotlib.pyplot as plt
from src.dataset.data_preparation import prepare_data
from config.base import HORIZON, TEST_LENGTH_MULTIPLIER

# Get data
train_df, test_df, hist_exog = prepare_data(horizon=HORIZON, test_length_multiplier=TEST_LENGTH_MULTIPLIER)

Loading and preparing data...

Total data shape: (2922, 33)
Train set shape: (2915, 33)
Test set shape: (7, 33)
  Train set covers: 2017-01-01 00:00:00 to 2024-12-24 00:00:00
  Test set covers: 2024-12-25 00:00:00 to 2024-12-31 00:00:00


In [5]:
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS, LSTM
from neuralforecast.utils import PredictionIntervals
from config.base import CV_N_WINDOWS, CV_STEP_SIZE

nf = NeuralForecast(
    models=[
        NHITS(
            h=7,                   # Forecast horizon
            input_size=2 * 7,      # Length of input sequence
            max_steps=100,               # Number of steps to train
            n_freq_downsample=[2, 1, 1],
        ),
        # LSTM(input_size=2 * 7,
        #        h=7,                    # Forecast horizon
        #        max_steps=500,                # Number of steps to train
        #        scaler_type='standard',       # Type of scaler to normalize data
        #        encoder_hidden_size=64,       # Defines the size of the hidden state of the LSTM
        #        decoder_hidden_size=64,
        # )
    ],
    freq='D')

# nf.fit(
#     train_df,
#     val_size=7,
#     prediction_intervals=PredictionIntervals(n_windows=2))

# nf.predict(level=[90])

cv_df = nf.cross_validation(
    df=train_df,
    n_windows=CV_N_WINDOWS,
    step_size=7,
    # prediction_intervals=PredictionIntervals(n_windows=2)
)

print(cv_df)

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.640     Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

    unique_id         ds     cutoff          NHITS            y
0     Bitcoin 2024-01-17 2024-01-16   43140.320312  42742.65234
1     Bitcoin 2024-01-18 2024-01-16   43228.617188  41262.05859
2     Bitcoin 2024-01-19 2024-01-16   43280.667969  41618.40625
3     Bitcoin 2024-01-20 2024-01-16   43340.562500  41665.58594
4     Bitcoin 2024-01-21 2024-01-16   43213.472656  41545.78516
..        ...        ...        ...            ...          ...
338   Bitcoin 2024-12-20 2024-12-17  106404.296875  97755.92969
339   Bitcoin 2024-12-21 2024-12-17  106513.250000  97224.72656
340   Bitcoin 2024-12-22 2024-12-17  106205.039062  95104.93750
341   Bitcoin 2024-12-23 2024-12-17  106279.968750  94686.24219
342   Bitcoin 2024-12-24 2024-12-17  105847.414062  98676.09375

[343 rows x 5 columns]


In [7]:
# from statsforecast import StatsForecast
# from statsforecast.models import AutoARIMA

# sf = StatsForecast(models=[AutoARIMA(season_length=7)], freq='D', verbose=False) # Replace 'D' with your freq
# df = sf.cross_validation(
#     df=train_df[['unique_id', 'ds', 'y']],
#     h=24,
#     step_size=24,
#     n_windows=2
# ) # Ensure val_size is adequate


In [1]:
import pandas as pd

df = pd.read_parquet('data/processed/feature_selection_7_mc.parquet')
# df1 = pd.read_parquet('data/raw/feature_selection_7.parquet')
# df2 = pd.read_parquet('data/raw/feature_selection_14.parquet')
# df3 = pd.read_parquet('data/raw/feature_selection_30.parquet')
# df4 = pd.read_parquet('data/raw/feature_selection_60.parquet')
# df5 = pd.read_parquet('data/raw/feature_selection_90.parquet')

In [3]:
print(df.head())
# print(len(df1.columns))
# print(len(df2.columns))
# print(len(df3.columns))
# print(len(df4.columns))
# print(len(df5.columns))

# print(df.columns)
# print(df1.columns)
# print(df2.columns)
# print(df3.columns)
# print(df4.columns)
# print(df5.columns)

  unique_id         ds         y  btc_close_ema_21_dist_norm  \
0   Bitcoin 2017-01-02  0.023193                    0.118906   
1   Bitcoin 2017-01-03  0.021389                    0.125047   
2   Bitcoin 2017-01-04  0.100960                    0.190063   
3   Bitcoin 2017-01-05 -0.130575                    0.070082   
4   Bitcoin 2017-01-06 -0.116209                   -0.040466   

   estimated_transaction_volume_usd_blockchain  MVRV_cbbi  Confidence_cbbi  \
0                                 1.131921e+08     0.0030           0.0055   
1                                 6.177315e+07     0.0109           0.0103   
2                                 9.192380e+07     0.0253           0.0071   
3                                 1.321019e+08    -0.0047           0.0107   
4                                -1.141164e+08    -0.0074          -0.0091   

   Fear Greed  bearish_sentiment  market_narrative_sentiment  ...  \
0        70.0               59.0                        20.0  ...   
1       

Raw feature list:
Index(['unique_id', 'ds', 'y', 'btc_sma_5', 'btc_ema_5', 'btc_sma_14', 'btc_ema_14',
       'btc_sma_21', 'btc_ema_21', 'btc_sma_50', 'btc_ema_50',
       'btc_sma_14_50_diff', 'btc_ema_14_50_diff', 'btc_sma_14_50_ratio',
       'btc_sma_14_slope', 'btc_ema_14_slope', 'btc_sma_21_slope',
       'btc_ema_21_slope', 'btc_sma_50_slope', 'btc_ema_50_slope',
       'btc_close_ema_21_dist', 'btc_close_ema_21_dist_norm', 'btc_rsi_14',
       'btc_macd', 'btc_macd_signal', 'btc_macd_diff', 'btc_bb_high',
       'btc_bb_low', 'btc_bb_mid', 'btc_bb_width', 'btc_atr_14',
       'btc_volatility_index', 'btc_trading_volume',
       'active_addresses_blockchain', 'hash_rate_blockchain',
       'miner_revenue_blockchain', 'difficulty_blockchain',
       'estimated_transaction_volume_usd_blockchain', 'PiCycle_cbbi',
       'RUPL_cbbi', 'RHODL_cbbi', 'Puell_cbbi', '2YMA_cbbi', 'Trolololo_cbbi',
       'MVRV_cbbi', 'ReserveRisk_cbbi', 'Woobull_cbbi', 'Confidence_cbbi',
       'Fear Greed', 'positive_sentiment', 'negative_sentiment',
       'bullish_sentiment', 'bearish_sentiment', 'risk_uncertainty_sentiment',
       'problem_malicious_sentiment', 'active_trading_sentiment',
       'long_term_investment_sentiment', 'market_narrative_sentiment',
       'core_technology_sentiment', 'development_ecosystem_sentiment',
       'news_events_sentiment', 'regulations_sentiment',
       'community_social_sentiment', 'price_sentiment', 'volume_sentiment',
       'marketcap_sentiment', 'Gold_Price', 'Gold_Share', 'Gold_Volatility',
       'Oil_Crude_Price', 'Oil_Brent_Price', 'Oil_Volatility', 'DJI', 'GSPC',
       'IXIC', 'NYFANG', 'CBOE_Volatility', 'EM_ETF', 'DXY', 'EURUSD'],
      dtype='object')

Reduce feature list for 7 days horizon:
Index(['unique_id', 'ds', 'y', 'Oil_Volatility', 'marketcap_sentiment',
       'EM_ETF', 'btc_volatility_index', 'btc_sma_50_slope', 'btc_bb_width',
       'Gold_Volatility', 'Fear Greed', 'bearish_sentiment', 'RHODL_cbbi',
       'btc_macd_diff', 'btc_close_ema_21_dist_norm',
       'active_addresses_blockchain', 'btc_trading_volume',
       'core_technology_sentiment', 'bullish_sentiment', 'volume_sentiment',
       'btc_sma_21_slope', 'market_narrative_sentiment',
       'hash_rate_blockchain'],
      dtype='object')

Reduce feature list for 14 days horizon:
Index(['unique_id', 'ds', 'y', 'EM_ETF', 'btc_close_ema_21_dist_norm',
       'RHODL_cbbi', 'Gold_Volatility', 'PiCycle_cbbi', 'btc_macd_diff',
       'estimated_transaction_volume_usd_blockchain', 'CBOE_Volatility',
       'btc_sma_14_50_ratio', 'Fear Greed', 'marketcap_sentiment',
       'market_narrative_sentiment', 'btc_bb_width', 'volume_sentiment',
       'regulations_sentiment', 'EURUSD', 'problem_malicious_sentiment',
       'active_addresses_blockchain', 'btc_trading_volume', 'btc_sma_14_slope',
       'hash_rate_blockchain'],
      dtype='object')

Reduce feature list for 30 days horizon:
Index(['unique_id', 'ds', 'y', 'btc_sma_50_slope', 'EM_ETF', 'Fear Greed',
       'Gold_Volatility', 'btc_sma_21_slope', 'btc_rsi_14',
       'miner_revenue_blockchain', 'btc_macd_diff',
       'market_narrative_sentiment', 'CBOE_Volatility', 'marketcap_sentiment',
       'problem_malicious_sentiment', 'volume_sentiment', 'RHODL_cbbi',
       'bullish_sentiment', 'bearish_sentiment', 'btc_trading_volume'],
      dtype='object')

Reduce feature list for 60 days horizon:
Index(['unique_id', 'ds', 'y', 'EM_ETF', 'miner_revenue_blockchain',
       'Gold_Volatility', 'btc_sma_21_slope', 'Oil_Brent_Price', 'btc_rsi_14',
       'market_narrative_sentiment', 'btc_sma_14_50_ratio',
       'bearish_sentiment', 'btc_sma_50_slope', 'btc_bb_width',
       'bullish_sentiment', 'btc_volatility_index', 'btc_trading_volume',
       'DXY', 'active_addresses_blockchain', 'PiCycle_cbbi'],
      dtype='object')

Reduce feature list for 90 days horizon:
Index(['unique_id', 'ds', 'y', 'Fear Greed', 'btc_sma_50_slope',
       'Gold_Volatility', 'EURUSD', 'bearish_sentiment', 'EM_ETF',
       'estimated_transaction_volume_usd_blockchain'],
      dtype='object')

In [3]:
from IPython.display import display
import pandas as pd
from src.dataset.data_preparation import prepare_pipeline_data

train_df, test_df, hist_exog_list, data_info = prepare_pipeline_data()

cv_df = pd.read_csv('results/results_7d/cv/cv_df_stat.csv')


📊 STEP 1: DATA PREPARATION
----------------------------------------
Loading and preparing data...

Total data shape: (2922, 23)
Train set shape: (2915, 23)
Test set shape: (7, 23)
  Train set covers: 2017-01-01 00:00:00 to 2024-12-24 00:00:00
  Test set covers: 2024-12-25 00:00:00 to 2024-12-31 00:00:00
✅ Data prepared successfully
   • Training samples: 2,915
   • Test samples: 7
   • Features: 20 exogenous variables
   • Forecast horizon: 7 days


In [4]:
import pandas as pd

from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mse, mae, rmse

In [17]:
cv_df = pd.read_csv('results/results_90d/cv/cv_df.csv')

In [18]:
evaluation_df = evaluate(cv_df.drop(columns='cutoff'), metrics=[mse, mae, rmse])
evaluation_df['best_model'] = evaluation_df.drop(columns=['metric', 'unique_id']).idxmin(axis=1)
evaluation_df.head()

Unnamed: 0,unique_id,metric,AutoARIMA,AutoETS,AutoTheta,AutoNBEATSx,AutoTSMixerx,AutoiTransformer,AutoBiTCN,best_model
0,Bitcoin,mse,33724200.0,70174070.0,68576590.0,197683500.0,186903000.0,154660400.0,160884500.0,AutoARIMA
1,Bitcoin,mae,4229.54,5731.123,5726.52,9319.29,9013.437,8959.529,10578.66,AutoARIMA
2,Bitcoin,rmse,5807.254,8376.996,8281.098,14060.0,13671.25,12436.25,12684.02,AutoARIMA


In [19]:
summary_df = evaluation_df.groupby(['metric', 'best_model']).size().sort_values().to_frame()
summary_df = summary_df.reset_index()
summary_df.columns = ['metric', 'model', 'nr. of unique_ids']
summary_df

Unnamed: 0,metric,model,nr. of unique_ids
0,mae,AutoARIMA,1
1,mse,AutoARIMA,1
2,rmse,AutoARIMA,1
