# Statsforecast Models

In [None]:
# Import
import pandas as pd
import numpy as np
import hvplot.pandas
from pathlib import Path
from finta import TA
from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA,
    HoltWinters,
    CrostonClassic as Croston, 
    HistoricAverage,
    DynamicOptimizedTheta as DOT,
    SeasonalNaive
)
from datasetsforecast.losses import mse, mae, rmse
%matplotlib inline

### Set Parameters & Import the OHLCV dataset into a Pandas DataFrame.

In [None]:
# choose coin of interest: BTC,ETH,XRP,DOGE,ADA
# choose pair of interest: USD, USDT
coin = 'BTC'
pair = 'USD'

# choose exchange of interest Bitfinex, Binance, Coinbase
exchange = 'Bitfinex'

# choose data interval of interest 1h, 1d
time = '1h'

# get saved histoical csv data from Data folder, which was downloaded from cryptoDataOnline.com
df = pd.read_csv(Path(f"./Data/Formatted_Data/{exchange}/{pair}/{exchange}_{coin}_{time}.csv"), index_col= "Datetime", parse_dates= True, infer_datetime_format = True)
display (df.head())

In [None]:
df = df.sort_index(ascending=True)

In [None]:
df['Close'].hvplot()

## Format Dataframe for Prophet configuration

In [None]:
# Calculate return:
df["Return"] = df["Close"].pct_change()

slice_df = df.copy()

# Slice dataframe for a training/testing timeframe
start = '2019-01-01 00:00:00'
end = '2020-01-10 00:00:00'

slice_df= slice_df.loc[start:end,:]
slice_df.head(5)

In [None]:
# reset the index, moving datetime to a column, and copy dataframe to Y_df variable name
Y_df = slice_df.reset_index()

# format dataframe to drop all columns except Datetime, Symbol Pair, and Return
# when using tether pairs you must change Volume_USD to Volume_USDT
Y_df = Y_df.drop(columns=['Exchange','Open', 'High',
                          'Low', 'Close','Volume_USD',f'Volume_in_{coin}']) 

# rename columns to fit with Prophet formatting
# y = Y_df['Return']
Y_df = Y_df.rename({'Symbol_Pair': 'unique_id', 'Return': 'y', 'Datetime': 'ds'}, axis=1)

#drop all null values
Y_df = Y_df.dropna()
Y_df

In [None]:
#Plot of the Returns from the sliced Datetime
StatsForecast.plot(Y_df)

## Statsforecast model instantiation and predictions

In [None]:
# Create a list of models and instantiation parameters
models = [
    AutoARIMA(season_length=24),
    HoltWinters(),
    Croston(),
    SeasonalNaive(season_length=24),
    HistoricAverage(),
    DOT(season_length=24)
]

In [None]:
# Instantiate StatsForecast class as sf
sf = StatsForecast(
    df=Y_df, 
    models=models,
    freq='H', 
    n_jobs=-1,
    fallback_model = SeasonalNaive(season_length=7)
)

In [None]:
forecasts_df = sf.forecast(h=48, level=[90])
forecasts_df.head()

In [None]:
sf.plot(Y_df,forecasts_df)

In [None]:
# Plot to unique_ids and some selected models
# when using tether pairs must change unique_id=["BTCUSDT"]
sf.plot(Y_df, forecasts_df, models=["SeasonalNaive","DynamicOptimizedTheta"], unique_ids=["BTC/USD"], level=[90])

In [None]:
# Explore other models 
# when using tether pairs must change unique_id=["BTCUSDT"]
sf.plot(Y_df, forecasts_df, models=["AutoARIMA"], unique_ids=["BTC/USD"], level=[90])

In [None]:
crossvaldation_df = sf.cross_validation(
    df=Y_df,
    h=24,
    step_size=24,
    n_windows=2
  )

In [None]:
crossvaldation_df.tail()

In [None]:
# Cross validation for evaluation statistics
def evaluate_cross_validation(df, metric):
    models = df.drop(columns=['ds', 'cutoff', 'y']).columns.tolist()
    evals = []
    for model in models:
        eval_ = df.groupby(['unique_id', 'cutoff']).apply(lambda x: metric(x['y'].values, x[model].values)).to_frame() # Calculate loss for every unique_id, model and cutoff.
        eval_.columns = [model]
        evals.append(eval_)
    evals = pd.concat(evals, axis=1)
    evals = evals.groupby(['unique_id']).mean(numeric_only=True) # Averages the error metrics for all cutoffs for every combination of model and unique_id
    evals['best_model'] = evals.idxmin(axis=1)
    return evals

In [None]:
evaluation_df = evaluate_cross_validation(crossvaldation_df, rmse)

print("Dataframe of crossvalidation results Root Mean Squared Error")
evaluation_df.head()

In [None]:
summary_df = evaluation_df.groupby('best_model').size().sort_values().to_frame()

summary_df.reset_index().columns = ["Model", "Nr. of unique_ids"]

In [None]:
seasonal_ids = evaluation_df.query('best_model == "AutoARIMA"').index

sf.plot(Y_df,forecasts_df, unique_ids=seasonal_ids, models=["AutoARIMA","DynamicOptimizedTheta"])

In [None]:
forecasts_df.info()

# Evaluate Model Predictions

In [None]:
#Create new DF and copy best model forecast results
eval_df = eval_df.rename({'ds': 'Datetime', 'y': 'Predicted Returns'}, axis=1)
eval_df = forecasts_df.set_index('Datetime').copy()

# eval_df = forecasts_df[['ds','AutoARIMA','SeasonalNaive','DynamicOptimizedTheta']].copy()
# eval_df['Actual Returns'] = df['Returns'].loc[


eval_df.head()

In [None]:
backtest_df = pd.concat([pred_df, forecasts2_df], axis=1).loc['2022-12-27':'2023-01-04']
backtest_df

In [None]:
backtest_df = backtest_df.drop('close', axis=1)
backtest_df.dropna(inplace=True)

In [None]:
backtest_df

In [None]:
backtest_df.hvplot()

In [None]:
y_test = backtest_df['Returns']
pred = backtest_df['AutoARIMA']
from sklearn.metrics import mean_squared_error
import math
print("The MSE of the y_test and model predicted value was:")
print(mean_squared_error(y_test, pred))
print("The square-root of the MSE of the y_test and model predicted value was:")
print(math.sqrt(mean_squared_error(y_test, pred)))

In [None]:

AA_Returns_Difference = (backtest_df['AutoARIMA'] - backtest_df['Returns']).sum().sum()
SN_Returns_Difference = (backtest_df['SeasonalNaive'] - backtest_df['Returns']).sum().sum()
DOT_Returns_Difference = (backtest_df['DynamicOptimizedTheta'] - backtest_df['Returns']).sum().sum()

print("The sum of the differences for AutoARIMA is:", AA_Returns_Difference)
print("The sum of the differences for SeasonalNaive is:", SN_Returns_Difference)
print("The sum of the differences for DynamicOptimizedTheta is:", DOT_Returns_Difference)