In [1]:
# === Setup & Imports ===
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error
from sklearn.model_selection import TimeSeriesSplit


In [2]:
# === Load processed dataset ===
processed_path = Path("./data/processed/crypto_features.csv")
df = pd.read_csv(processed_path, parse_dates=["Date"]).sort_values(["Name","Date"])
print(" Loaded processed dataset:", df.shape)
print(df.head(3))


 Loaded processed dataset: (8142, 17)
      Name                Date  LogReturn  Volatility_7d  Volatility_30d  \
0  Bitcoin 2013-04-29 23:59:59  -0.003573      -0.471133       -0.518136   
1  Bitcoin 2013-04-30 23:59:59  -0.729296      -0.471133       -0.518136   
2  Bitcoin 2013-05-01 23:59:59  -3.090928       1.453074        1.575664   

   Momentum_7d  Momentum_30d  Close_lag1  Close_lag7  Close_lag30  \
0    -0.032620     -0.068549   -0.039989   -0.043431    -0.077756   
1    -1.836824     -3.170161   -0.323441   -0.043431    -0.077756   
2    -4.813778     -8.306402   -0.324176   -0.043431    -0.077756   

   Volume_lag1  Volume_lag7  Volume_lag30  Bollinger_Width  DayOfWeek  \
0    -0.402534    -0.401335     -0.401902        -0.412876  -1.498926   
1    -0.472872    -0.401335     -0.401902        -0.831533  -0.999079   
2    -0.472872    -0.401335     -0.401902         0.315612  -0.499233   

      Month       Close  
0 -0.730178  144.539993  
1 -0.730178  139.000000  
2 -0.4385

In [None]:
# === Imports ===
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# === ARIMA Baseline ===
coins = df["Name"].unique()[:3]   # Taking first 3 coins for demonstration
results_arima = []

for coin in coins:
    sub = df[df["Name"] == coin].dropna(subset=["Close"]).copy()
    sub["Date"] = pd.to_datetime(sub["Date"])
    sub = sub.set_index("Date").sort_index()

    # --- Train-Test Split ---
    train_size = int(len(sub) * 0.8)
    train, test = sub["Close"][:train_size], sub["Close"][train_size:]

    # --- Walk-forward ARIMA forecasting ---
    preds, history = [], list(train)
    for t in range(len(test)):
        model = ARIMA(history, order=(1,1,1))
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        preds.append(yhat)
        history.append(test.iloc[t])

    # --- Metrics ---
    mae = mean_absolute_error(test, preds)
    rmse = np.sqrt(mean_squared_error(test, preds))
    results_arima.append({"Coin": coin, "Model": "ARIMA(1,1,1)", "MAE": mae, "RMSE": rmse})

    # --- Plot Actual vs Predicted ---
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test.index, y=test.values,
                             mode='lines', name='Actual', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=test.index, y=preds,
                             mode='lines', name='Predicted', line=dict(color='orange')))
    fig.update_layout(title=f'ARIMA Forecast - {coin}',
                      xaxis_title='Date', yaxis_title='Close Price',
                      template='plotly_white')
    fig.show()

# === Combine Metrics ===
arima_df = pd.DataFrame(results_arima)
print("ARIMA Model Complete")
print(arima_df)
