In [1]:
from statsmodels.tsa.ar_model import AutoReg
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import pandas as pd


In [2]:
#import the data
df = pd.read_csv('/Users/gadimg/Library/Mobile Documents/com~apple~CloudDocs/PhD dis/third/Data/Merged_data/Full_with_otherCrypto+mothly.csv', parse_dates=['Date'], dayfirst=True)

# Difference the data
df_diff = df.drop(columns=['Date']).diff().dropna()

# Normalize the entire differenced dataset
scaler = StandardScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df_diff), columns=df_diff.columns)

# Prepare the data
Y = df_normalized['ETH_Price'].values
X = df_normalized.drop(columns=['ETH_Price']).values

# Number of observations
n = len(df_normalized)

In [8]:
# Your Y and start are already defined from the Lasso part
# Recursive window prediction for AR model, straight up with 7 lags
start = n - 100

ar_predictions = []

for i in range(start, n):
    # Define the training data up to point i
    y_train_ar = Y[:i]
    
    # Fit the AR model - we'll use a fixed lag of 7, you might have to adjust based on your previous criteria
    ar_model = AutoReg(y_train_ar, lags=1, old_names=False).fit()
    
    # Make the one-step-ahead forecast
    y_pred_ar = ar_model.predict(start=i, end=i)
    ar_predictions.append(y_pred_ar[0])

# Calculate RMSE for the AR predictions
ar_rmse = np.sqrt(mean_squared_error(Y[start:], ar_predictions))
print(f"AR Model RMSE for the last 100 values: {ar_rmse}")


AR Model RMSE for the last 100 values: 0.2758212041945253


In [4]:
#determining the appropriate lags 

from statsmodels.tsa.stattools import adfuller

# Make sure your series is stationary
result = adfuller(Y)
if result[1] < 0.05:
    print("The series is stationary.")
    optimal_lag = None
    best_aic = np.inf
    
    # Determine the optimal number of lags using AIC on the full dataset
    for lag in range(1, 8):  # You can adjust the range of lags you want to test
        model = AutoReg(Y, lags=lag, old_names=False).fit()
        if model.aic < best_aic:
            best_aic = model.aic
            optimal_lag = lag

    print(f"Optimal number of lags based on full data AIC: {optimal_lag}")
else:
    print("The series is not stationary. Make it stationary before applying AR model.")


The series is stationary.
Optimal number of lags based on full data AIC: 6


In [5]:
# Continue with the recursive window prediction using the optimal lag
ar_predictions = []
for i in range(start, n):
    y_train_ar = Y[:i]
    ar_model = AutoReg(y_train_ar, lags=optimal_lag, old_names=False).fit()
    y_pred_ar = ar_model.predict(start=i, end=i)
    ar_predictions.append(y_pred_ar[0])

# Calculate RMSE for the AR predictions
ar_rmse = np.sqrt(mean_squared_error(Y[start:], ar_predictions))
print(f"AR Model RMSE for the last 100 values: {ar_rmse}")


AR Model RMSE for the last 100 values: 0.28135957450308663


In [6]:
#testiing for stationarity
from statsmodels.tsa.stattools import adfuller

def adf_test(series, column_name):
    result = adfuller(series, autolag='AIC')
    print(f'Stationarity test for {column_name}')
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    for key, value in result[4].items():
        print(f'Critical Value ({key}): {value}')
    print('')

# Test for stationarity on the original dataframe before normalization
for column in df.columns:
    if column != 'Date':  # Skip the 'Date' column
        adf_test(df[column], column)

# Test for stationarity on the normalized dataframe
for column in df_normalized.columns:
    adf_test(df_normalized[column], column)


Stationarity test for ETH_Price
ADF Statistic: -2.0876050751855173
p-value: 0.2495277831325552
Critical Value (1%): -3.4360194465416387
Critical Value (5%): -2.8640434537995523
Critical Value (10%): -2.5681028978640104

Stationarity test for ETH_Volume
ADF Statistic: -2.803682634596565
p-value: 0.05774965237684188
Critical Value (1%): -3.4360941478268767
Critical Value (5%): -2.864076408006588
Critical Value (10%): -2.568120448620112

Stationarity test for BTC_Price
ADF Statistic: -1.9093981994300466
p-value: 0.32765289963049593
Critical Value (1%): -3.4359901072390353
Critical Value (5%): -2.86403051064684
Critical Value (10%): -2.5680960046332366

Stationarity test for BTC_Volume
ADF Statistic: -2.8876368527291216
p-value: 0.04680466753474846
Critical Value (1%): -3.4360540638753405
Critical Value (5%): -2.864058725215095
Critical Value (10%): -2.568111031100567

Stationarity test for LINK_number_of_transfers
ADF Statistic: -4.014484254400848
p-value: 0.0013365378855441038
Critical V

Stationarity test for new_tokens_monthly_x
ADF Statistic: -1.5414771984093532
p-value: 0.5129025752082446
Critical Value (1%): -3.4359901072390353
Critical Value (5%): -2.86403051064684
Critical Value (10%): -2.5680960046332366

Stationarity test for CPILFESL
ADF Statistic: -0.1981267656140812
p-value: 0.9387445339387863
Critical Value (1%): -3.4359901072390353
Critical Value (5%): -2.86403051064684
Critical Value (10%): -2.5680960046332366

Stationarity test for EUEPUINDXM
ADF Statistic: -2.4283294371230313
p-value: 0.13389973709844627
Critical Value (1%): -3.4359901072390353
Critical Value (5%): -2.86403051064684
Critical Value (10%): -2.5680960046332366

Stationarity test for CHNMAINLANDEPU
ADF Statistic: -3.312743035204589
p-value: 0.014318381190617126
Critical Value (1%): -3.4359901072390353
Critical Value (5%): -2.86403051064684
Critical Value (10%): -2.5680960046332366

Stationarity test for NASDAQ_Close
ADF Statistic: -1.8679388577868377
p-value: 0.347290680554707
Critical Valu

Stationarity test for WBTC_Volume
ADF Statistic: -13.846954699130494
p-value: 7.120330040051577e-26
Critical Value (1%): -3.4360590437486405
Critical Value (5%): -2.864060922068716
Critical Value (10%): -2.5681122011011426

Stationarity test for USDT_number_of_transfers
ADF Statistic: -11.771571064103341
p-value: 1.0868051591121041e-21
Critical Value (1%): -3.4360941478268767
Critical Value (5%): -2.864076408006588
Critical Value (10%): -2.568120448620112

Stationarity test for USDT_on_chain_volume
ADF Statistic: -12.3262444619694
p-value: 6.579448484936728e-23
Critical Value (1%): -3.436089106551049
Critical Value (5%): -2.864074184091338
Critical Value (10%): -2.568119264202968

Stationarity test for USDT_Price
ADF Statistic: -11.196645235026583
p-value: 2.2976739078421686e-20
Critical Value (1%): -3.4360941478268767
Critical Value (5%): -2.864076408006588
Critical Value (10%): -2.568120448620112

Stationarity test for USDT_Volume
ADF Statistic: -14.207096349546338
p-value: 1.7471944

Stationarity test for XRP_Close
ADF Statistic: -7.123820852458182
p-value: 3.6638959242780043e-10
Critical Value (1%): -3.4361042569951805
Critical Value (5%): -2.8640808675675826
Critical Value (10%): -2.568122823703617

Stationarity test for XRP_Volume
ADF Statistic: -11.774074779860522
p-value: 1.0727868158362064e-21
Critical Value (1%): -3.436089106551049
Critical Value (5%): -2.864074184091338
Critical Value (10%): -2.568119264202968

Stationarity test for DOT_Close
ADF Statistic: -11.003079908991978
p-value: 6.606997194493412e-20
Critical Value (1%): -3.4360342309571053
Critical Value (5%): -2.864049975956835
Critical Value (10%): -2.568106371425197

Stationarity test for DOT_Volume
ADF Statistic: -14.578465485559919
p-value: 4.5087448644258444e-27
Critical Value (1%): -3.4360490926821727
Critical Value (5%): -2.8640565321870635
Critical Value (10%): -2.5681098631379964

Stationarity test for ADA_Close
ADF Statistic: -6.564481423726867
p-value: 8.224236568114506e-09
Critical Valu

In [7]:
# Continue with the recursive window prediction using the optimal lag
ar_predictions = []
for i in range(start, n):
    y_train_ar = Y[:i]
    ar_model = AutoReg(y_train_ar, lags=1, old_names=False).fit()
    y_pred_ar = ar_model.predict(start=i, end=i)
    ar_predictions.append(y_pred_ar[0])

# Calculate RMSE for the AR predictions
ar_rmse = np.sqrt(mean_squared_error(Y[start:], ar_predictions))
print(f"AR Model RMSE for the last 100 values: {ar_rmse}")


AR Model RMSE for the last 100 values: 0.2758212041945253
