In [1]:
from ray import tune
import matplotlib.pyplot as plt
from src.dataset.data_preparation import prepare_data
from config.base import HORIZON, TEST_LENGTH_MULTIPLIER

# Get data
train_df, test_df, hist_exog = prepare_data(horizon=HORIZON, test_length_multiplier=TEST_LENGTH_MULTIPLIER)

Loading and preparing data...

Total data shape: (2922, 33)
Train set shape: (2915, 33)
Test set shape: (7, 33)
  Train set covers: 2017-01-01 00:00:00 to 2024-12-24 00:00:00
  Test set covers: 2024-12-25 00:00:00 to 2024-12-31 00:00:00


In [5]:
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS, LSTM
from neuralforecast.utils import PredictionIntervals
from config.base import CV_N_WINDOWS, CV_STEP_SIZE

nf = NeuralForecast(
    models=[
        NHITS(
            h=7,                   # Forecast horizon
            input_size=2 * 7,      # Length of input sequence
            max_steps=100,               # Number of steps to train
            n_freq_downsample=[2, 1, 1],
        ),
        # LSTM(input_size=2 * 7,
        #        h=7,                    # Forecast horizon
        #        max_steps=500,                # Number of steps to train
        #        scaler_type='standard',       # Type of scaler to normalize data
        #        encoder_hidden_size=64,       # Defines the size of the hidden state of the LSTM
        #        decoder_hidden_size=64,
        # )
    ],
    freq='D')

# nf.fit(
#     train_df,
#     val_size=7,
#     prediction_intervals=PredictionIntervals(n_windows=2))

# nf.predict(level=[90])

cv_df = nf.cross_validation(
    df=train_df,
    n_windows=CV_N_WINDOWS,
    step_size=7,
    # prediction_intervals=PredictionIntervals(n_windows=2)
)

print(cv_df)

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.640     Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

    unique_id         ds     cutoff          NHITS            y
0     Bitcoin 2024-01-17 2024-01-16   43140.320312  42742.65234
1     Bitcoin 2024-01-18 2024-01-16   43228.617188  41262.05859
2     Bitcoin 2024-01-19 2024-01-16   43280.667969  41618.40625
3     Bitcoin 2024-01-20 2024-01-16   43340.562500  41665.58594
4     Bitcoin 2024-01-21 2024-01-16   43213.472656  41545.78516
..        ...        ...        ...            ...          ...
338   Bitcoin 2024-12-20 2024-12-17  106404.296875  97755.92969
339   Bitcoin 2024-12-21 2024-12-17  106513.250000  97224.72656
340   Bitcoin 2024-12-22 2024-12-17  106205.039062  95104.93750
341   Bitcoin 2024-12-23 2024-12-17  106279.968750  94686.24219
342   Bitcoin 2024-12-24 2024-12-17  105847.414062  98676.09375

[343 rows x 5 columns]


In [7]:
# from statsforecast import StatsForecast
# from statsforecast.models import AutoARIMA

# sf = StatsForecast(models=[AutoARIMA(season_length=7)], freq='D', verbose=False) # Replace 'D' with your freq
# df = sf.cross_validation(
#     df=train_df[['unique_id', 'ds', 'y']],
#     h=24,
#     step_size=24,
#     n_windows=2
# ) # Ensure val_size is adequate


In [6]:
import pandas as pd

df = pd.read_parquet('data/processed/final_feature_selected_data.parquet')
df1 = pd.read_parquet('data/final/feature_selection_7.parquet')
df2 = pd.read_parquet('data/final/feature_selection_14.parquet')
df3 = pd.read_parquet('data/final/feature_selection_30.parquet')

In [7]:
df.columns, df1.columns, df2.columns, df3.columns

(Index(['unique_id', 'ds', 'y', 'Oil_Crude_Price', 'btc_rsi_14',
        'Gold_Volatility', 'btc_sma_14_50_ratio', 'bearish_sentiment',
        'btc_sma_21_slope', 'Puell_cbbi', 'Oil_Volatility', 'Fear Greed',
        'bullish_sentiment', 'regulations_sentiment', 'marketcap_sentiment',
        'volume_sentiment', 'hash_rate_blockchain', 'btc_macd_diff',
        'btc_trading_volume'],
       dtype='object'),
 Index(['unique_id', 'ds', 'y', 'Oil_Volatility', 'marketcap_sentiment',
        'EM_ETF', 'btc_volatility_index', 'btc_sma_50_slope', 'btc_bb_width',
        'Gold_Volatility', 'Fear Greed', 'bearish_sentiment', 'RHODL_cbbi',
        'btc_macd_diff', 'btc_close_ema_21_dist_norm',
        'active_addresses_blockchain', 'btc_trading_volume',
        'core_technology_sentiment', 'bullish_sentiment', 'volume_sentiment',
        'btc_sma_21_slope', 'market_narrative_sentiment',
        'hash_rate_blockchain'],
       dtype='object'),
 Index(['unique_id', 'ds', 'y', 'EM_ETF', 'btc_close_

In [8]:
len(df.columns), len(df1.columns), len(df2.columns), len(df3.columns)

(19, 23, 24, 20)