# Classifiers

In [80]:
# Important packages
import pandas as pd
import numpy as np
import ta
from scipy.stats import linregress
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectFromModel

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

In [81]:
df = pd.read_parquet('../Dataframes/df.parquet.gzip')

In [82]:
# Clean null data
df.dropna(inplace=True)

# Make date the index
df.index = df["date"]
df.drop(["date"], axis=1, inplace=True)

In [83]:
# Make df['delta'] the expected output
target = df['delta']
df.drop(['delta'], axis = 1, inplace = True)

# Train and Test samples
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size = 0.30)

## Random Forest Classifier

In [72]:
rf_model = RandomForestClassifier(n_estimators=20, max_depth=2, random_state=0)

rf_model.fit(X_train, y_train)

print("RandomForest accuracy : ",accuracy_score(y_test, rf_model.predict(X_test), normalize = True))

RandomForest accuracy :  0.7732293697205977


In [73]:
predict_lst = list(rf_model.predict(df.values.tolist()))
predict_lst

[0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0

## Support Vector Machine Classifier

In [74]:
svc_model = SVC(random_state=0, gamma='auto')

svc_model.fit(X_train, y_train)

print("LinearSVC accuracy : ",accuracy_score(y_test, svc_model.predict(X_test), normalize = True))

LinearSVC accuracy :  0.49902534113060426


In [75]:
predict_lst = list(svc_model.predict(df.values.tolist()))
predict_lst

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0

In [76]:
estm.decision_function(df.values.tolist())

array([-0.99937739, -0.99937739, -1.00035395, ..., -0.99937739,
       -1.00035395, -0.04039302])

In [77]:
df['pred_level'] = predict_lst
df.head()

Unnamed: 0_level_0,open,high,low,close,wclose,so_3,so_4,so_5,so_8,so_9,...,lo_avg_2,hilo_avg_2,hilo_avg,slope_3,slope_4,slope_5,slope_10,slope_20,slope_30,pred_level
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13/3/2000,1.5773,1.5829,1.5761,1.5786,1.57905,34.042553,29.078014,47.368421,47.368421,39.647577,...,1.57575,1.57955,1.5795,-1051.660517,-480.169784,-124.55239,-374.380286,-405.042029,-553.768912,0.0
14/3/2000,1.5786,1.5795,1.5682,1.5787,1.576275,67.307692,63.253012,51.470588,51.470588,51.470588,...,1.57215,1.576675,1.57385,-418.01685,-528.634361,-464.546547,-528.940484,-418.450089,-548.468327,0.0
15/3/2000,1.5787,1.5787,1.5699,1.5733,1.5738,34.693878,32.692308,30.722892,25.0,25.0,...,1.56905,1.574075,1.5743,-422.252011,-497.671324,-568.730181,-608.062,-416.686639,-543.624365,0.0
16/3/2000,1.5733,1.5775,1.5692,1.5765,1.574925,73.451327,56.462585,53.205128,40.686275,40.686275,...,1.56955,1.573825,1.57335,-986.842105,-527.622595,-562.098501,-598.90398,-423.601117,-539.882002,0.0
17/3/2000,1.5765,1.5766,1.5702,1.5753,1.57435,64.210526,62.831858,48.29932,34.803922,34.803922,...,1.5697,1.573375,1.5734,-945.945946,-1004.56621,-619.904891,-605.513402,-425.768716,-543.211462,0.0


## XGBoost

In [84]:
boost_model = XGBClassifier()

boost_model.fit(X_train, y_train)

print("XGBoost accuracy : ",accuracy_score(y_test, boost_model.predict(X_test), normalize = True))

XGBoost accuracy :  0.816114359974009


In [86]:
predict_lst = list(boost_model.predict(df.values.to))

ValueError: feature_names mismatch: ['open', 'high', 'low', 'close', 'wclose', 'so_3', 'so_4', 'so_5', 'so_8', 'so_9', 'so_10', 'wr_6', 'wr_7', 'wr_8', 'wr_9', 'wr_10', 'roc_12', 'roc_13', 'roc_14', 'roc_15', 'wcp_15', 'macd_15_30', 'cci_15', 'hi_avg_2', 'lo_avg_2', 'hilo_avg_2', 'hilo_avg', 'slope_3', 'slope_4', 'slope_5', 'slope_10', 'slope_20', 'slope_30'] ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32']
expected so_9, hi_avg_2, high, so_3, wr_7, hilo_avg, open, hilo_avg_2, roc_13, roc_12, roc_14, slope_20, low, slope_3, wr_8, slope_10, so_8, macd_15_30, wr_10, wr_6, roc_15, so_10, wcp_15, slope_30, lo_avg_2, close, wclose, so_4, cci_15, slope_5, wr_9, so_5, slope_4 in input data
training data did not have the following fields: f28, f32, f20, f10, f3, f13, f21, f6, f27, f12, f15, f9, f4, f0, f1, f2, f11, f16, f7, f19, f22, f24, f29, f5, f30, f23, f14, f31, f8, f18, f25, f26, f17

[[1.5773,
  1.5829,
  1.5761,
  1.5786,
  1.57905,
  34.042553191490065,
  29.07801418439711,
  47.36842105263127,
  47.36842105263127,
  39.64757709251066,
  30.100334448160375,
  -52.63157894736873,
  -52.63157894736873,
  -52.63157894736873,
  -60.35242290748934,
  -69.89966555183962,
  -1.281971108748669,
  -1.5958109961351492,
  -2.169062964799202,
  -1.3559957507967288,
  1.5861190744233407,
  -0.00995053187156092,
  -59.16104146576781,
  1.58335,
  1.57575,
  1.57955,
  1.5795,
  -1051.6605166051636,
  -480.16978379095474,
  -124.55238984898497,
  -374.38028579761186,
  -405.04202877878157,
  -553.7689123495255],
 [1.5786,
  1.5795,
  1.5682,
  1.5787,
  1.576275,
  67.30769230769177,
  63.25301204819269,
  51.47058823529396,
  51.47058823529396,
  51.47058823529396,
  43.568464730290245,
  -48.52941176470604,
  -48.52941176470604,
  -48.52941176470604,
  -48.52941176470604,
  -56.431535269709755,
  -0.9038980603854101,
  -1.2757175911450176,
  -1.589577359431497,
  -2.162865642