In [5]:
import json
import pandas as pd
import pandas as pd
from pmdarima import auto_arima
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf

In [2]:
df = pd.read_csv("../data/final/dataset.csv", parse_dates=["Date"], index_col="Date")

# Define the list of technical indicators
technical_features = [
    'btc_sma_14',
    'btc_ema_14', 'btc_rsi_14', 'btc_macd', 'btc_macd_signal',
    'btc_macd_diff', 'btc_bb_high', 'btc_bb_low', 'btc_bb_mid',
    'btc_bb_width', 'btc_atr_14', 'btc_trading_volume',
    'btc_volatility_index'
]

non_technical_features = [col for col in df.columns if col not in [technical_features, 'btc_close']]

In [4]:
from statsmodels.tsa.stattools import adfuller

# Function to perform ADF test and determine d
def find_d_value(series, max_d=5):
    d = 0
    for i in range(max_d + 1):
        if i == 0:
            test_series = series
        else:
            test_series = series.diff(i).dropna()
        
        result = adfuller(test_series)
        p_value = result[1]
        
        if p_value <= 0.05:
            print(f"Column '{series.name}' is stationary after {i} differences (d = {i})")
            return i
    print(f"Column '{series.name}' could not be made stationary within {max_d} differences")
    return max_d

# Dictionary to store d values for each column
d_values = {}

# Loop through each non-technical feature column and find d value
for column in non_technical_features:
    d_value = find_d_value(df[column])
    d_values[column] = d_value

# Print the dictionary
print("d values for each column:")
print(d_values)

Column 'google_trends_bitcoin' is stationary after 0 differences (d = 0)
Column 'active_addresses_blockchain' is stationary after 1 differences (d = 1)
Column 'hash_rate_blockchain' is stationary after 1 differences (d = 1)
Column 'miner_revenue_blockchain' is stationary after 1 differences (d = 1)
Column 'Gold_Close' is stationary after 1 differences (d = 1)
Column 'Oil_Close' is stationary after 1 differences (d = 1)
Column 'DJI' is stationary after 1 differences (d = 1)
Column 'GSPC' is stationary after 1 differences (d = 1)
Column 'IXIC' is stationary after 1 differences (d = 1)
Column 'NYSE FANG+' is stationary after 1 differences (d = 1)
Column 'ARK Innovation ETF' is stationary after 1 differences (d = 1)
Column 'CBOE Volatility Index' is stationary after 0 differences (d = 0)
Column 'iShares MSCI Emerging Markets ETF' is stationary after 1 differences (d = 1)
Column 'Shanghai Composite Index' is stationary after 0 differences (d = 0)
Column 'USD Index (DXY)' is stationary after

In [8]:
# Function to fit auto_arima and return the model
def fit_auto_arima(series, seasonal_period=12):
    model = auto_arima(
        series,
        seasonal=True,
        m=seasonal_period,  # Seasonality (e.g., 12 for monthly data)
        max_p=3, max_d=1, max_q=3,  # Non-seasonal order limits
        max_P=2, max_D=1, max_Q=2,  # Seasonal order limits
        trace=True,  # Print progress
        error_action='ignore',  # Ignore fitting errors
        suppress_warnings=True
    )
    return model

# Function to process each column in parallel
def process_column(column_name, series, seasonal_period=12):
    print(f"Processing column: {column_name}")
    model = fit_auto_arima(series, seasonal_period)
    print(f"Best SARIMA parameters for {column_name}: {model.order}, {model.seasonal_order}")
    return {
        'column': column_name,
        'order': model.order,
        'seasonal_order': model.seasonal_order,
        'model': model
    }

# Seasonal period (set based on your data's seasonality)
seasonal_period = 12  # Example: 12 for monthly data with yearly seasonality

# Process all columns in parallel
results = Parallel(n_jobs=-1)(
    delayed(process_column)(column, df[column], seasonal_period)
    for column in non_technical_features
)

# Store results in a dictionary
sarima_params = {result['column']: result for result in results}

# Print the results
print("\nSARIMA parameters for each column:")
for column, result in sarima_params.items():
    print(f"Column: {column}")
    print(f"Order: {result['order']}")
    print(f"Seasonal Order: {result['seasonal_order']}")
    print(f"Model AIC: {result['model'].aic}")
    print("------")

Processing column: DJI
Processing column: IXIC
Processing column: ARK Innovation ETF
Processing column: active_addresses_blockchain
Processing column: GSPC
Processing column: miner_revenue_blockchain
Processing column: NYSE FANG+
Processing column: google_trends_bitcoin
Processing column: Gold_Close
Processing column: Oil_Close
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Processing column: hash_rate_blockchain
Performing stepwise search to minimize aic
Processing column: CBOE Volatility Index
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=14472.728, Time=1.43 sec
 ARIM

In [14]:
btc_price = pd.read_csv("../data/raw/historical_data.csv", parse_dates=["Date"], index_col="Date")

high_low_vol_col = ['High', 'Low', 'Volume']

# Function to fit auto_arima and return the model
def fit_auto_arima(series, seasonal_period=12):
    model = auto_arima(
        series,
        seasonal=True,
        m=seasonal_period,  # Seasonality (e.g., 12 for monthly data)
        max_p=3, max_d=1, max_q=3,  # Non-seasonal order limits
        max_P=2, max_D=1, max_Q=2,  # Seasonal order limits
        trace=True,  # Print progress
        error_action='ignore',  # Ignore fitting errors
        suppress_warnings=True
    )
    return model

# Function to process each column in parallel
def process_column(column_name, series, seasonal_period=12):
    print(f"Processing column: {column_name}")
    model = fit_auto_arima(series, seasonal_period)
    print(f"Best SARIMA parameters for {column_name}: {model.order}, {model.seasonal_order}")
    return {
        'column': column_name,
        'order': model.order,
        'seasonal_order': model.seasonal_order,
        'model': model
    }

# Seasonal period (set based on your data's seasonality)
seasonal_period = 12  # Example: 12 for monthly data with yearly seasonality

# Process all columns in parallel
results = Parallel(n_jobs=-1)(
    delayed(process_column)(column, btc_price[column], seasonal_period)
    for column in high_low_vol_col
)

# Store results in a dictionary
sarima_params2 = {result['column']: result for result in results}

# Print the results
print("\nSARIMA parameters for each column:")
for column, result in sarima_params2.items():
    print(f"Column: {column}")
    print(f"Order: {result['order']}")
    print(f"Seasonal Order: {result['seasonal_order']}")
    print(f"Model AIC: {result['model'].aic}")
    print("------")

Processing column: Low
Processing column: VolumeProcessing column: High

Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=177649.548, Time=1.26 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=178860.476, Time=0.02 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=178366.563, Time=0.22 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=177773.305, Time=0.36 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=178858.489, Time=0.02 sec
 ARIMA(2,1,2)(0,0,1)[12] intercept   : AIC=177650.333, Time=0.75 sec
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=60120.907, Time=3.21 sec
 ARIMA(2,1,2)(1,0,1)[12] intercept   : AIC=59128.408, Time=3.22 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=60161.884, Time=0.02 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=59198.627, Time=0.02 sec
 ARIMA(2,1,2)(1,0,0)[12] intercept   : AIC=177652.357, Time=0.73 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=

In [12]:
# Extract only serializable data
serializable_params = {
    feature: {
        'column': params['column'],
        'order': params['order'],
        'seasonal_order': params['seasonal_order']
    }
    for feature, params in sarima_params.items()
}


Serializable SARIMA parameters exported to sarima_params.json


In [15]:
# Extract only serializable data from new_sarima_params
new_serializable_params = {
    feature: {
        'column': params['column'],
        'order': params['order'],
        'seasonal_order': params['seasonal_order']
    }
    for feature, params in sarima_params2.items()
}

# Append new_serializable_params to serializable_params
serializable_params.update(new_serializable_params)


# Save to JSON
with open('../results/metrics/sarima_params.json', 'w') as f:
    json.dump(serializable_params, f, indent=4)
print("Serializable SARIMA parameters exported to sarima_params.json")

Serializable SARIMA parameters exported to sarima_params.json
