In [18]:
import xgboost as xgb
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [19]:
FORWARD_STEPS = [1, 3, 5, 10]

In [25]:
df = pd.read_csv("Data/intermediate_data.csv")

In [22]:
df_clean = df.dropna()

In [21]:
def get_model():

    xgbparams = {
        'eta': 0.01,
        'booster': 'gbtree',
        'subsample': 0.76,
        'colsample_bytree': 0.6,
        'min_child_weight': 1,
        'n_estimators': 1000,
        'lambda': 3,
        'alpha': 0,
        'gamma': 0,
        'max_depth': 6,
        'booster': 'gbtree',
        'tree_method': 'gpu_hist',
        'objective': 'reg:squarederror',
    }

    model = xgb.XGBRegressor(**xgbparams)

    return model

In [23]:
for forward_step in FORWARD_STEPS:

    X = df_clean[["Bitcoin % Change", "Gold % Change",
                  f"Bitcoin % Naive Prediction ({forward_step} Days)",
                  "30-day Bitcoin rolling standard deviation",
                  "Bitcoin PSY",
                  "Bitcoin RSI"]].iloc[:-forward_step, :].copy()
    y = df_clean["Value"][forward_step:].values / df_clean["Value"][:-forward_step].values - 1

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25)

    model = get_model()

    model.fit(X_train, y_train,
              eval_metric="rmse", 
              eval_set=[(X_val, y_val)],
              early_stopping_rounds=300,
              verbose=50)

    df[f"Bitcoin % Final Prediction ({forward_step} Days)"] = pd.Series(model.predict(df_clean[X.columns]), index=df_clean.index)

[0]	validation_0-rmse:0.49524
[50]	validation_0-rmse:0.30301
[100]	validation_0-rmse:0.18784
[150]	validation_0-rmse:0.12021
[200]	validation_0-rmse:0.08207
[250]	validation_0-rmse:0.06212
[300]	validation_0-rmse:0.05281
[350]	validation_0-rmse:0.04865
[400]	validation_0-rmse:0.04699
[450]	validation_0-rmse:0.04627
[500]	validation_0-rmse:0.04600
[550]	validation_0-rmse:0.04592
[600]	validation_0-rmse:0.04590
[650]	validation_0-rmse:0.04591
[700]	validation_0-rmse:0.04591
[750]	validation_0-rmse:0.04595
[800]	validation_0-rmse:0.04600
[850]	validation_0-rmse:0.04606
[900]	validation_0-rmse:0.04611
[910]	validation_0-rmse:0.04611
[0]	validation_0-rmse:0.49290
[50]	validation_0-rmse:0.30537
[100]	validation_0-rmse:0.19495
[150]	validation_0-rmse:0.13235
[200]	validation_0-rmse:0.09958
[250]	validation_0-rmse:0.08339
[300]	validation_0-rmse:0.07586
[350]	validation_0-rmse:0.07242
[400]	validation_0-rmse:0.07069
[450]	validation_0-rmse:0.06991
[500]	validation_0-rmse:0.06954
[550]	validati

In [24]:
df.to_csv("Data/final_prediction.csv", index=False)