# Mantis Research Pipeline

In [7]:
# Initial Imports
import pandas as pd
import numpy as np
import math
from pathlib import Path
from dotenv import load_dotenv
import time
from joblib import dump,load # Save Models
from numpy import random
import os
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn import preprocessing
from sklearn import svm
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [62]:
# Read CSV into dataframes
btcusd_6h_historical_csv = Path('df_candles_kraken_btcusd_6h_append_9102.csv')
btcusd_6h_historical = pd.read_csv(btcusd_6h_historical_csv,index_col="time")
btcusd_6h_historical.reset_index(inplace=True)

ethbtc_6h_historical_csv = Path('df_prices_kraken_ethbtc_6h_historical_20181003_20200925.csv')
ethbtc_6h_historical = pd.read_csv(ethbtc_6h_historical_csv, index_col="time")

btcusd_6h_historical.head()

Unnamed: 0,time,close
0,2013-10-06 18:00:00+00:00,122.0
1,2013-10-07 18:00:00+00:00,123.61
2,2013-10-08 00:00:00+00:00,124.18
3,2013-10-09 06:00:00+00:00,123.84
4,2013-10-10 18:00:00+00:00,125.86


In [3]:
# Creating date_strings dataframe for upcoming merge
date_list = pd.date_range(start='10/06/2013', end='09/29/2020',freq='6H')
date_strings = [str(x) for x in date_list][3:]
date_strings_df = pd.DataFrame(date_strings, columns = ['time'])
date_strings_df['time'] = [(x+"+00:00") for x in date_strings_df['time']]
date_strings_df['time'].head()

0    2013-10-06 18:00:00+00:00
1    2013-10-07 00:00:00+00:00
2    2013-10-07 06:00:00+00:00
3    2013-10-07 12:00:00+00:00
4    2013-10-07 18:00:00+00:00
Name: time, dtype: object

In [4]:
# Merges historical data with date_strings dataframe
# makes sure rows are every 6hrs, if there is no row-then make one and forward fill data (shrimpy doesn't print candle if there is no tick)
btcusd_6h_historical_full = pd.merge(date_strings_df, btcusd_6h_historical, on='time', how='left')
btcusd_6h_historical_ffill = btcusd_6h_historical_full.fillna(method="ffill")
btcusd_6h_historical_ffill = btcusd_6h_historical_ffill[0:-19]
btcusd_6h_historical_ffill.head()

Unnamed: 0,time,close
0,2013-10-06 18:00:00+00:00,122.0
1,2013-10-07 00:00:00+00:00,122.0
2,2013-10-07 06:00:00+00:00,122.0
3,2013-10-07 12:00:00+00:00,122.0
4,2013-10-07 18:00:00+00:00,123.61


In [11]:
# Function that generates Feature DataFrame to feed into Machine Learning Algorithm
# Takes historical dataframe as input
def calc_feature_dataframe(prices_df):
    df_features = prices_df.copy()
    # calculate real returns
    df_features['returns'] = (df_features['close'] - df_features['close'].shift(-1))/df_features['close'].shift(-1)
    # calculates log prices
    df_features['log_price'] = np.log(df_features['close'])
    # Contruct log returns
    df_features['log_returns'] = df_features['log_price'] - df_features['log_price'].shift(-1)
    # Construct cum returns
    #df_features['cum_returns'] = math.exp(df_features['log_returns'])-1
    # Construct dependent variables
    df_features['log_returns_shifted'] = df_features['log_returns'].shift(-1)
    # ----------------------- Price Dynamics --------------------------------
    # price dynamics as a one Dimensional particle problem in physics
    # Calculates price velocity by subtracting the previous time's log price from the currrent time's log price
    # This yields the log returns which is one way of representing velocity. See appendix[1]
    df_features['price_velocity_lr_2'] = df_features['log_price'] - df_features['log_price'].shift(-2) 
    df_features['price_velocity_lr_3'] =df_features['log_price'] - df_features['log_price'].shift(-3)
    df_features['price_velocity_lr_4'] = df_features['log_price'] - df_features['log_price'].shift(-4)
    df_features['price_velocity_lr_7'] = df_features['log_price'] - df_features['log_price'].shift(-7)
    df_features['price_velocity_lr_30'] = df_features['log_price'] - df_features['log_price'].shift(-30)
    # calculates rolling mean velocity
    df_features['rolling_mean_velocity_lr_2'] = df_features['log_returns'].rolling(window=2).mean()
    df_features['rolling_mean_velocity_lr_3'] = df_features['log_returns'].rolling(window=3).mean()
    df_features['rolling_mean_velocity_lr_4'] = df_features['log_returns'].rolling(window=4).mean()
    df_features['rolling_mean_velocity_lr_7'] = df_features['log_returns'].rolling(window=7).mean()
    df_features['rolling_mean_velocit_lr_14'] = df_features['log_returns'].rolling(window=14).mean()
    df_features['rolling_mean_velocity_lr_30'] = df_features['log_returns'].rolling(window=30).mean()
    # calculates rolling std of velocity
    df_features['rolling_std_velocity_lr_2'] = df_features['log_returns'].rolling(window=2).std()
    df_features['rolling_std_velocity_lr_3'] = df_features['log_returns'].rolling(window=3).std()
    df_features['rolling_std_velocity_lr_4'] = df_features['log_returns'].rolling(window=4).std()
    df_features['rolling_std_velocity_lr_7'] = df_features['log_returns'].rolling(window=7).std()
    df_features['rolling_std_velocit_lr_14'] = df_features['log_returns'].rolling(window=14).std()
    df_features['rolling_std_velocity_lr_30'] = df_features['log_returns'].rolling(window=30).std()
    df_features.dropna(inplace=True)
    # Create Classification Dataframe to predict trend. We are interested in very bullish cases for our trading strategy
    class_df = df_features.copy()
    # Calculate Very Bullish Threshold- Assumes 1 log returns that are 1 std deviation from mean indicates a very bullish scenario
    vbullish_threshhold = df_features['log_returns'].mean() + df_features['log_returns'].std()
    class_df['trend'] = np.where(class_df['log_returns']>vbullish_threshhold, 1.0,0.0)
    class_df['trend_shifted'] = class_df.trend.shift(-1)
    class_df.dropna(inplace=True)
    class_df['trend_shifted'] = class_df.trend_shifted.astype('int64')
    class_df['trend'] = class_df.trend.astype('int64')
    return class_df

In [12]:
# generates Feature DataFrame to feed into Machine Learning Algorithm
class_df = calc_feature_dataframe(btcusd_6h_historical_ffill)
class_df.head()

Unnamed: 0,time,close,returns,log_price,log_returns,log_returns_shifted,price_velocity_lr_2,price_velocity_lr_3,price_velocity_lr_4,price_velocity_lr_7,...,rolling_mean_velocit_lr_14,rolling_mean_velocity_lr_30,rolling_std_velocity_lr_2,rolling_std_velocity_lr_3,rolling_std_velocity_lr_4,rolling_std_velocity_lr_7,rolling_std_velocit_lr_14,rolling_std_velocity_lr_30,trend,trend_shifted
29,2013-10-14 00:00:00+00:00,133.36726,0.0,4.893107,0.0,-0.016529,-0.016529,-0.016529,-0.014388,-0.003835,...,-0.004138,-0.00297,0.00814,0.019159,0.017641,0.014083,0.010279,0.007902,0,0
30,2013-10-14 06:00:00+00:00,133.36726,-0.016393,4.893107,-0.016529,0.0,-0.016529,-0.014388,-0.014388,-0.066734,...,-0.005319,-0.00352,0.011688,0.008474,0.015644,0.014102,0.010707,0.008257,0,0
31,2013-10-14 12:00:00+00:00,135.59,0.0,4.909636,0.0,0.002141,0.002141,0.002141,0.012694,-0.069715,...,-0.005319,-0.00352,0.011688,0.009543,0.00835,0.014102,0.010707,0.008257,0,0
32,2013-10-14 18:00:00+00:00,135.59,0.002143,4.909636,0.002141,0.0,0.002141,0.012694,0.012694,-0.040801,...,-0.005166,-0.003449,0.001514,0.010217,0.00868,0.014359,0.010804,0.008297,0,0
33,2013-10-15 00:00:00+00:00,135.3,0.0,4.907495,0.0,0.010553,0.010553,0.010553,-0.052346,-0.020411,...,-0.005166,-0.003012,0.001514,0.001236,0.00868,0.014359,0.010804,0.008114,0,0


In [14]:
class_df.tail()

Unnamed: 0,time,close,returns,log_price,log_returns,log_returns_shifted,price_velocity_lr_2,price_velocity_lr_3,price_velocity_lr_4,price_velocity_lr_7,...,rolling_mean_velocit_lr_14,rolling_mean_velocity_lr_30,rolling_std_velocity_lr_2,rolling_std_velocity_lr_3,rolling_std_velocity_lr_4,rolling_std_velocity_lr_7,rolling_std_velocit_lr_14,rolling_std_velocity_lr_30,trend,trend_shifted
10143,2020-09-15 12:00:00+00:00,10810.9,0.002383,9.28831,0.00238,4.6e-05,0.002426,-0.009125,-0.020562,-0.006371,...,-0.002985,-0.002575,0.000257,0.00652,0.005986,0.008583,0.009311,0.007922,0,0
10144,2020-09-15 18:00:00+00:00,10785.2,4.6e-05,9.28593,4.6e-05,-0.011551,-0.011505,-0.022943,-0.015265,-0.007556,...,-0.002742,-0.002012,0.00165,0.001464,0.00536,0.00828,0.009345,0.007459,0,0
10145,2020-09-16 00:00:00+00:00,10784.7,-0.011485,9.285884,-0.011551,-0.011438,-0.022989,-0.015311,-0.012422,-0.014736,...,-0.003094,-0.002495,0.008201,0.007461,0.006744,0.005869,0.009591,0.007596,0,0
10146,2020-09-16 06:00:00+00:00,10910.0,-0.011372,9.297435,-0.011438,0.007677,-0.00376,-0.00087,0.002754,-0.001896,...,-0.003295,-0.003009,8e-05,0.006663,0.007399,0.006331,0.009744,0.007663,0,0
10147,2020-09-16 12:00:00+00:00,11035.5,0.007707,9.308873,0.007677,0.00289,0.010567,0.014191,0.015387,0.005379,...,-0.004158,-0.002327,0.013516,0.011069,0.009398,0.007764,0.007907,0.007674,0,0


In [13]:
slice_df=class_df[['log_returns','trend', 'trend_shifted']]
slice_df.shape

(10119, 3)

In [15]:
class_df.trend.value_counts()

0    9277
1     842
Name: trend, dtype: int64

In [16]:
class_df.columns

Index(['time', 'close', 'returns', 'log_price', 'log_returns',
       'log_returns_shifted', 'price_velocity_lr_2', 'price_velocity_lr_3',
       'price_velocity_lr_4', 'price_velocity_lr_7', 'price_velocity_lr_30',
       'rolling_mean_velocity_lr_2', 'rolling_mean_velocity_lr_3',
       'rolling_mean_velocity_lr_4', 'rolling_mean_velocity_lr_7',
       'rolling_mean_velocit_lr_14', 'rolling_mean_velocity_lr_30',
       'rolling_std_velocity_lr_2', 'rolling_std_velocity_lr_3',
       'rolling_std_velocity_lr_4', 'rolling_std_velocity_lr_7',
       'rolling_std_velocit_lr_14', 'rolling_std_velocity_lr_30', 'trend',
       'trend_shifted'],
      dtype='object')

In [17]:
class_df.isna().sum()

time                           0
close                          0
returns                        0
log_price                      0
log_returns                    0
log_returns_shifted            0
price_velocity_lr_2            0
price_velocity_lr_3            0
price_velocity_lr_4            0
price_velocity_lr_7            0
price_velocity_lr_30           0
rolling_mean_velocity_lr_2     0
rolling_mean_velocity_lr_3     0
rolling_mean_velocity_lr_4     0
rolling_mean_velocity_lr_7     0
rolling_mean_velocit_lr_14     0
rolling_mean_velocity_lr_30    0
rolling_std_velocity_lr_2      0
rolling_std_velocity_lr_3      0
rolling_std_velocity_lr_4      0
rolling_std_velocity_lr_7      0
rolling_std_velocit_lr_14      0
rolling_std_velocity_lr_30     0
trend                          0
trend_shifted                  0
dtype: int64

In [22]:
class_df.set_index('time',inplace=True)
class_df.head()

Unnamed: 0_level_0,close,returns,log_price,log_returns,log_returns_shifted,price_velocity_lr_2,price_velocity_lr_3,price_velocity_lr_4,price_velocity_lr_7,price_velocity_lr_30,...,rolling_mean_velocit_lr_14,rolling_mean_velocity_lr_30,rolling_std_velocity_lr_2,rolling_std_velocity_lr_3,rolling_std_velocity_lr_4,rolling_std_velocity_lr_7,rolling_std_velocit_lr_14,rolling_std_velocity_lr_30,trend,trend_shifted
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-10-14 00:00:00+00:00,133.36726,0.0,4.893107,0.0,-0.016529,-0.016529,-0.016529,-0.014388,-0.003835,-0.279218,...,-0.004138,-0.00297,0.00814,0.019159,0.017641,0.014083,0.010279,0.007902,0,0
2013-10-14 06:00:00+00:00,133.36726,-0.016393,4.893107,-0.016529,0.0,-0.016529,-0.014388,-0.014388,-0.066734,-0.279218,...,-0.005319,-0.00352,0.011688,0.008474,0.015644,0.014102,0.010707,0.008257,0,0
2013-10-14 12:00:00+00:00,135.59,0.0,4.909636,0.0,0.002141,0.002141,0.002141,0.012694,-0.069715,-0.262689,...,-0.005319,-0.00352,0.011688,0.009543,0.00835,0.014102,0.010707,0.008257,0,0
2013-10-14 18:00:00+00:00,135.59,0.002143,4.909636,0.002141,0.0,0.002141,0.012694,0.012694,-0.040801,-0.294206,...,-0.005166,-0.003449,0.001514,0.010217,0.00868,0.014359,0.010804,0.008297,0,0
2013-10-15 00:00:00+00:00,135.3,0.0,4.907495,0.0,0.010553,0.010553,0.010553,-0.052346,-0.020411,-0.360364,...,-0.005166,-0.003012,0.001514,0.001236,0.00868,0.014359,0.010804,0.008114,0,0


## XG Boost Classifier

In [23]:
# Import necessary libraries for XG Boost Classifier
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
from sklearn import preprocessing
from sklearn import svm
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [24]:
# Set X,y and train/test
# Create independent variable dataframe
X = class_df.copy()
X.drop(["trend_shifted", "trend", "returns", "log_returns_shifted"], axis=1, inplace=True)
# Create dependent variable dataframe
y = class_df["trend_shifted"].values.reshape(-1, 1)

In [25]:
# Split into Training and testing data (CHANGE THIS to sequential split (70/30)) Areas to improve (cross validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78, train_size=.7)

In [50]:
len(X_test)

3036

In [26]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)
# Fit the StandardScaler object with the target data Y
scaler.fit(y)
# Scale the target training and testing sets
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [39]:
y_test

array([[-0.30126749],
       [-0.30126749],
       [-0.30126749],
       ...,
       [-0.30126749],
       [-0.30126749],
       [-0.30126749]])

In [27]:
# Train Model on training dataset
base_model = xgb.XGBClassifier(objective='binary:logistic').fit(X_train,y_train)

  return f(**kwargs)


In [28]:
# Test model on out of sample testing dataset
predictions = base_model.predict(X_test)
predicted_returns = scaler.inverse_transform(predictions.reshape(-1,1))
real_returns = scaler.inverse_transform(y_test.reshape(-1, 1))
# Create Prediction Dataframe
prediction_xgb = pd.DataFrame({
    "Real": real_returns.ravel(),
    "Predicted": predicted_returns.ravel()
})#, index=X_test


In [41]:
# Create accuracy test column to identify errors in prediction
prediction_xgb['Accuracy_Test'] = np.where(prediction_xgb['Real'] == prediction_xgb['Predicted'], True, False)
vbullish_threshhold = class_df['log_returns'].mean() + class_df['log_returns'].std()
prediction_xgb['trend_predicted'] = np.where(prediction_xgb['Predicted']>vbullish_threshhold, 1.0,0.0)
prediction_xgb.head()

Unnamed: 0,Real,Predicted,Accuracy_Test,trend_predicted
0,1.387779e-17,1.387779e-17,True,0.0
1,1.387779e-17,1.387779e-17,True,0.0
2,1.387779e-17,1.387779e-17,True,0.0
3,1.387779e-17,1.387779e-17,True,0.0
4,1.387779e-17,1.387779e-17,True,0.0


In [43]:
prediction_xgb['trend_predicted'].value_counts()

0.0    2817
1.0     219
Name: trend_predicted, dtype: int64

In [42]:
# Print Value Counts of True and false predictions
print(prediction_xgb['Accuracy_Test'].value_counts())

True     3007
False      29
Name: Accuracy_Test, dtype: int64


In [31]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [32]:
# Print Classification Report
steps = [('scaler', StandardScaler()),
         ('model',XGBClassifier())]         
pipeline = Pipeline(steps)        
y_pred = cross_val_predict(pipeline, X, y)          
print(confusion_matrix(y, y_pred))        
print(classification_report(y, y_pred, digits=4))

  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)


[[9243   34]
 [  61  781]]
              precision    recall  f1-score   support

           0     0.9934    0.9963    0.9949      9277
           1     0.9583    0.9276    0.9427       842

    accuracy                         0.9906     10119
   macro avg     0.9759    0.9619    0.9688     10119
weighted avg     0.9905    0.9906    0.9905     10119



In [69]:
dump(base_model, 'base_xgb_model.joblib')

['base_xgb_model.joblib']

# Backtest

In [48]:
# Identify Length of Prediction Dataframe
len(prediction_xgb)

3036

In [63]:
backtest_df = class_df.iloc[-3036:]
backtest_df.reset_index(inplace=True)
test_df = pd.merge(backtest_df, prediction_xgb, left_index=True, right_index=True)
test_df.set_index('time', inplace=True)
test_df = pd.merge(test_df, btcusd_6h_historical, how='left', on='time')
test_df = pd.merge(test_df, ethbtc_6h_historical, how='left', on='time')
test_df['btcusd_price'] = test_df['close_x']
test_df['ethbtc_price'] = test_df['close_y']
test_df.drop(columns=['close_x', 'close_y'], inplace=True)
test_df['ethbtc_price'].fillna(method='ffill', inplace=True)

In [68]:
test_df.columns

Index(['time', 'returns', 'log_price', 'log_returns', 'log_returns_shifted',
       'price_velocity_lr_2', 'price_velocity_lr_3', 'price_velocity_lr_4',
       'price_velocity_lr_7', 'price_velocity_lr_30',
       'rolling_mean_velocity_lr_2', 'rolling_mean_velocity_lr_3',
       'rolling_mean_velocity_lr_4', 'rolling_mean_velocity_lr_7',
       'rolling_mean_velocit_lr_14', 'rolling_mean_velocity_lr_30',
       'rolling_std_velocity_lr_2', 'rolling_std_velocity_lr_3',
       'rolling_std_velocity_lr_4', 'rolling_std_velocity_lr_7',
       'rolling_std_velocit_lr_14', 'rolling_std_velocity_lr_30', 'trend',
       'trend_shifted', 'Real', 'Predicted', 'Accuracy_Test',
       'trend_predicted', 'close', 'btcusd_price', 'ethbtc_price'],
      dtype='object')

In [None]:
time_list = []
btc_position = []
eth_position = []
usd_position = []
#bar = '6h'# bar length: adjest for testing and deployment
lags = 10 # number of lags for features data
min_bars = lags + 1 # minimum length for resampled data
initial_aum = 1000000 # initial assets under management
universe = ['XBT_USD', 'ETH_USD', 'ETH_BTC']
btc_threshold =  .6
vbull_wt = .2 #80% ethereum
neutral_wt = 1

In [66]:
# Set up inputs for backtest
test_df = test_df
initial_capital = 1000000
exchange='kraken'
rebalance_freq = '6h'
start = '2018-10-03'
end = '2019-09-16'
#fee_perc
#tax_fee

In [None]:
def return_analysis(test_df, initial_capital, exchange, rebalance_freq, start, end):
    name_of_model = 'XGB Multi-Classifier'
    initial_capital = float(initial_capital)
    def btc_accumulator(test_df):#(freq=6, threshold=.6):
        if test_df['trend_predicted'] == 1:
            wt_optimized = vbull_wt
        if test_df['trend_predicted'] == 0:
            wt_optimized = neutral_wt        
        # Calculate current balances
        usd_balance = balances_df[balances_df['symbol']=='USD']['usd_balance'].sum()
        xbt_balance = balances_df[balances_df['symbol']=='XBT']['usd_balance'].sum()
        alt_balances_df = balances_df[balances_df['symbol']!='USD']
        alt_balances_df = alt_balances_df[alt_balances_df['symbol']!='XBT']
        alt_balance = balances_df['usd_balance'].sum()
        trading_balance_wt = usd_balance + alt_balance
        # Allocation Logic
        if trading_balance_wt > 1-btc_threshold: # if trading balance weight is greater than .4 collect profits
            btc_wt = btc_threshold # accumulating bitcoin from trading profits
            usd_wt = (1-btc_threshold)*(wt_optimized) # allocating .4(1- optimized eth wt)
            eth_wt = (1-btc_threshold)*(1-wt_optimized) #  allocating .4 (optimized eth wt)
        else:
            btc_wt = current_btc_wt
            usd_wt = (1-current_btc_wt)*wt_optimized-1
            eth_wt = (1-current_btc_wt)*wt_optimized
        meta_level_wts = [btc_wt, usd_wt, eth_wt]
        return meta_level_wts

In [None]:
C:\Users\gdepa\Desktop\btc_accumulator.py

In [78]:
def load_model(features_df):
    X_test = class_df.drop(["trend_shifted", "trend", "returns", "log_returns_shifted"], axis=1)
    nameOfModel = 'XG BOOST CLASSIFIER'
    model = load(r'C:\Users\gdepa\base_xgb_model.joblib')
    # Test model on out of sample testing dataset
    predictions = model.predict(X_test)
    predicted_returns = scaler.inverse_transform(predictions.reshape(-1,1))
    real_returns = scaler.inverse_transform(y_test.reshape(-1, 1))
    # Create Prediction Dataframe
    prediction_xgb = pd.DataFrame({
        "Real": real_returns.ravel(),
        "Predicted": predicted_returns.ravel()
    })#, index=X_test
    # Create accuracy test column to identify errors in prediction
    prediction_xgb['Accuracy_Test'] = np.where(prediction_xgb['Real'] == prediction_xgb['Predicted'], True, False)
    vbullish_threshhold = class_df['log_returns'].mean() + class_df['log_returns'].std()
    prediction_xgb['trend_predicted'] = np.where(prediction_xgb['Predicted']>vbullish_threshhold, 1.0,0.0)
    return predictions_df

In [79]:
load_model(class_df)

ValueError: feature_names mismatch: ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19'] ['close', 'log_price', 'log_returns', 'price_velocity_lr_2', 'price_velocity_lr_3', 'price_velocity_lr_4', 'price_velocity_lr_7', 'price_velocity_lr_30', 'rolling_mean_velocity_lr_2', 'rolling_mean_velocity_lr_3', 'rolling_mean_velocity_lr_4', 'rolling_mean_velocity_lr_7', 'rolling_mean_velocit_lr_14', 'rolling_mean_velocity_lr_30', 'rolling_std_velocity_lr_2', 'rolling_std_velocity_lr_3', 'rolling_std_velocity_lr_4', 'rolling_std_velocity_lr_7', 'rolling_std_velocit_lr_14', 'rolling_std_velocity_lr_30']
expected f5, f1, f19, f11, f14, f15, f8, f12, f17, f13, f9, f0, f2, f7, f16, f6, f10, f18, f3, f4 in input data
training data did not have the following fields: rolling_mean_velocit_lr_14, price_velocity_lr_4, rolling_mean_velocity_lr_3, rolling_std_velocity_lr_30, price_velocity_lr_3, price_velocity_lr_2, price_velocity_lr_7, rolling_mean_velocity_lr_7, rolling_std_velocity_lr_3, rolling_std_velocity_lr_7, log_returns, rolling_std_velocity_lr_4, log_price, rolling_mean_velocity_lr_4, rolling_std_velocit_lr_14, rolling_mean_velocity_lr_2, rolling_mean_velocity_lr_30, close, price_velocity_lr_30, rolling_std_velocity_lr_2