# Setting up our Notebook

In [10]:
# Things to import

# Standard data, plotting, and mathematical tools
import numpy as np
import pandas as pd
import math as math
import matplotlib as mpl
import matplotlib.pyplot as plt

# XGBoost
import xgboost as xgb

# Training and Evaluation Tools
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

# Importing the data

In [11]:
# Importing the data for BTC

dfs=['PCA 24 Hourly BTC.csv', 'PCA 24 Hourly ETH.csv', 'PCA 24 Hourly ADA.csv']

# XGBoost for BTC

## Training and testing data

In [12]:
# Train and test splitting and scaling
X=pd.read_csv(dfs[0])
y=X['Label'].values
X=X.drop('Label', axis=1)
X=X.drop('Unnamed: 0', axis=1)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=100, shuffle=True)

In [13]:
buy=list(y).count(2)
sell=list(y).count(0)
hold=list(y).count(1)
tot=len(y)

print(hold/tot)

0.3486034397005314


## Grid Search XGB

In [14]:
# Setting up our grid

param_grid = {
    "max_depth": [3, 4, 5, 7],
    "learning_rate": [0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    "scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
}

# Fitting our XGBoost with out grid

# Init classifier
xgb_cl = xgb.XGBClassifier()

# Init Grid Search
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=5)

# Fit
grid_cv.fit(X_train, y_train)

# Getting the score and best parameters from our grid search

print('Score ',grid_cv.best_score_)
print('Params ', grid_cv.best_params_)



Parameters: { "scale_pos_weight" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Score  0.8052356020942408
Params  {'colsample_bytree': 0.5, 'gamma': 1, 'learning_rate': 0.1, 'max_depth': 7, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.8}


In [15]:
# Training our XGBoost with our best params

# Setting up our XGBoost with our best params
best_xgb = xgb.XGBClassifier(
    grid_cv.best_params_,
    colsample_bytree=0.5,
    subsample=0.8)

# Fitting the model
best_xgb.fit(X_train, y_train)

# Performance

# Predict
preds = best_xgb.predict(X_test)
print(preds)
# Score
print(accuracy_score(y_test, preds))
confusion_matrix(y_test, preds)





[1. 1. 0. ... 0. 2. 0.]
0.8087946603847664


array([[1890,  354,   48],
       [ 269, 2026,  351],
       [  47,  392, 2264]], dtype=int64)

In [16]:
# save to JSON
best_xgb.save_model("Models/XGB BTC.json")

# Loading the model
xgb_model_BTC = xgb.XGBClassifier()
xgb_model_BTC.load_model("Models/XGB BTC.json")

# XGBoost for ETH

## Training and testing data

In [17]:
# Train and test splitting and scaling
X=pd.read_csv(dfs[1])
y=X['Label'].values
X=X.drop('Label', axis=1)
X=X.drop('Unnamed: 0', axis=1)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=100, shuffle=True)

In [18]:
buy=list(y).count(2)
sell=list(y).count(0)
hold=list(y).count(1)
tot=len(y)

print(hold/tot)

0.25784665322897304


## Grid Search XGB

In [19]:
# Setting up our grid

param_grid = {
    "max_depth": [3, 4, 5, 7],
    "learning_rate": [0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    "scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
}

# Fitting our XGBoost with out grid

# Init classifier
xgb_cl = xgb.XGBClassifier()

# Init Grid Search
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=5)

# Fit
grid_cv.fit(X_train, y_train)

# Getting the score and best parameters from our grid search

print('Score ',grid_cv.best_score_)
print('Params ', grid_cv.best_params_)



Parameters: { "scale_pos_weight" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Score  0.786812827225131
Params  {'colsample_bytree': 0.5, 'gamma': 0.25, 'learning_rate': 0.1, 'max_depth': 7, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.8}


In [20]:
# Training our XGBoost with our best params

# Setting up our XGBoost with our best params
best_xgb = xgb.XGBClassifier(
    grid_cv.best_params_,
    colsample_bytree=0.5,
    subsample=0.8)

# Fitting the model
best_xgb.fit(X_train, y_train)

# Performance

# Predict
preds = best_xgb.predict(X_test)
print(preds)
# Score
print(accuracy_score(y_test, preds))
confusion_matrix(y_test, preds)





[1. 2. 0. ... 0. 2. 0.]
0.7970160973694542


array([[2235,  326,   81],
       [ 319, 1234,  386],
       [ 105,  334, 2621]], dtype=int64)

In [21]:
# save to JSON
best_xgb.save_model("Models/XGB ETH.json")

# Loading the model
xgb_model_ETH = xgb.XGBClassifier()
xgb_model_ETH.load_model("Models/XGB ETH.json")

# XGBoost for ADA

## Training and testing data

In [22]:
# Train and test splitting and scaling
X=pd.read_csv(dfs[2])
y=X['Label'].values
X=X.drop('Label', axis=1)
X=X.drop('Unnamed: 0', axis=1)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=100, shuffle=True)

## Grid Search XGB

In [23]:
# Setting up our grid

param_grid = {
    "max_depth": [3, 4, 5, 7],
    "learning_rate": [0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    "scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
}

# Fitting our XGBoost with out grid

# Init classifier
xgb_cl = xgb.XGBClassifier()

# Init Grid Search
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=5)

# Fit
grid_cv.fit(X_train, y_train)

# Getting the score and best parameters from our grid search

print('Score ',grid_cv.best_score_)
print('Params ', grid_cv.best_params_)



Parameters: { "scale_pos_weight" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Score  0.7891861450572925
Params  {'colsample_bytree': 0.5, 'gamma': 1, 'learning_rate': 0.1, 'max_depth': 7, 'reg_lambda': 1, 'scale_pos_weight': 1, 'subsample': 0.8}


In [24]:
# Training our XGBoost with our best params

# Setting up our XGBoost with our best params
best_xgb = xgb.XGBClassifier(
    grid_cv.best_params_,
    colsample_bytree=0.5,
    subsample=0.8)

# Fitting the model
best_xgb.fit(X_train, y_train)

# Performance

# Predict
preds = best_xgb.predict(X_test)
print(preds)
# Score
print(accuracy_score(y_test, preds))
confusion_matrix(y_test, preds)





[2. 0. 2. ... 0. 0. 2.]
0.7941787941787942


array([[640,  55,  40],
       [ 93, 109, 102],
       [ 41,  65, 779]], dtype=int64)

In [25]:
# save to JSON
best_xgb.save_model("Models/XGB ADA.json")

# Loading the model
xgb_model_ADA = xgb.XGBClassifier()
xgb_model_ADA.load_model("Models/XGB ADA.json")