# **1. Importing Necessary Libraries**

In [1]:
import pandas as pd
import sklearn
import xgboost as xgb
from sklearn.metrics import accuracy_score
from extractFeatures import ExtractFeatures
import matplotlib.pyplot as plt
import seaborn as sns


# **2. Processing the Data**

## **2.1 Transferring data into dataframe**

In [2]:
model = ExtractFeatures()
model.data.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-12-30 09:00:00+00:00,16462.0,16494.5,16445.5,16494.0,5892400.0
2022-12-30 08:00:00+00:00,16539.0,16539.0,16424.0,16462.0,19083200.0
2022-12-30 07:00:00+00:00,16540.5,16562.0,16523.0,16539.0,5373000.0
2022-12-30 06:00:00+00:00,16596.5,16596.5,16505.5,16540.5,11444700.0
2022-12-30 05:00:00+00:00,16587.5,16596.5,16580.0,16596.5,2362600.0


## **2.2 Adding Featues to the dataframe**
### 2.2.1 Exponential Moving Average

In [3]:
# No. of days for taking the exponential moving average
days = [20, 80, 150, 200]
for day in days:
    model.EWMA(day)

model.data.head()

Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-12-30 09:00:00+00:00,16462.0,16494.5,16445.5,16494.0,5892400.0,16494.0,16494.0,16494.0,16494.0
2022-12-30 08:00:00+00:00,16539.0,16539.0,16424.0,16462.0,19083200.0,16477.2,16477.8,16477.893333,16477.92
2022-12-30 07:00:00+00:00,16540.5,16562.0,16523.0,16539.0,5373000.0,16499.89259,16498.712098,16498.534407,16498.483938
2022-12-30 06:00:00+00:00,16596.5,16596.5,16505.5,16540.5,11444700.0,16511.615321,16509.554059,16509.236559,16509.146036
2022-12-30 05:00:00+00:00,16587.5,16596.5,16580.0,16596.5,2362600.0,16532.148203,16527.823436,16527.157734,16526.967983


#### 2.2.2 Bolinger Bands

In [4]:
days = 100
model.bbands(days)
model.data.tail()


Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801


### 2.2.3 Commodity Channel Index

In [5]:
days = 20
model.CCI(days)
model.data.tail()


Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627,72.141578
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269,48.453337
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613,11.595434
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203,-116.259275
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801,-60.413916


### 2.2.4 Ease of Movement

In [6]:
days = 14
model.EVM(days)
model.data.tail()

Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI,EVM
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627,72.141578,-1.2e-05
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269,48.453337,-5e-06
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613,11.595434,-2.7e-05
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203,-116.259275,-3.6e-05
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801,-60.413916,-2.2e-05


### 2.2.5 Rate of Change

In [7]:
days = 11
model.ROC(days)
model.data.tail()

Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI,EVM,ROW
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627,72.141578,-1.2e-05,0.001233
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269,48.453337,-5e-06,0.002683
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613,11.595434,-2.7e-05,0.003739
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203,-116.259275,-3.6e-05,-0.000875
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801,-60.413916,-2.2e-05,-0.000693


### 2.2.6 Force Index

In [8]:
days = 10
model.ForceIndex(days)
model.data.tail()

Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI,EVM,ROW,ForceIndex
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627,72.141578,-1.2e-05,0.001233,412275000.0
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269,48.453337,-5e-06,0.002683,455565600.0
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613,11.595434,-2.7e-05,0.003739,810880350.0
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203,-116.259275,-3.6e-05,-0.000875,-379583750.0
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801,-60.413916,-2.2e-05,-0.000693,-349050900.0


### 2.2.7 Simple Moving Average

In [9]:
days = [20,]
for day in days:
    model.SMA(day)

model.data.tail()

Unnamed: 0_level_0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI,EVM,ROW,ForceIndex,SMA_20
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2022-11-18 22:00:00+00:00,16629.0,16660.0,16621.5,16647.0,6596400.0,16616.600075,16475.311673,16434.387316,16447.912668,16974.795373,15601.064627,72.141578,-1.2e-05,0.001233,412275000.0,16616.475
2022-11-18 21:00:00+00:00,16645.0,16672.5,16602.5,16629.0,9903600.0,16617.78102,16479.106446,16436.964972,16449.714617,16982.370731,15603.029269,48.453337,-5e-06,0.002683,455565600.0,16616.975
2022-11-18 20:00:00+00:00,16556.0,16659.5,16555.0,16645.0,10884300.0,16620.373304,16483.202583,16439.720408,16451.657845,16990.151387,15604.208613,11.595434,-2.7e-05,0.003739,810880350.0,16617.9
2022-11-18 19:00:00+00:00,16580.0,16598.5,16537.5,16556.0,10692500.0,16614.242513,16485.00005,16441.260537,16452.696123,16995.466797,15606.793203,-116.259275,-3.6e-05,-0.000875,-379583750.0,16614.375
2022-11-18 18:00:00+00:00,16628.5,16633.0,16541.0,16580.0,18371100.0,16610.981321,16487.345728,16443.098149,16453.962886,17001.317199,15609.942801,-60.413916,-2.2e-05,-0.000693,-349050900.0,16612.05


### 2.2.8 Normalize the features

In [10]:
model.normalize()
model.data.tail()

Unnamed: 0,open,high,low,close,volume,EWMA_20,EWMA_80,EWMA_150,EWMA_200,UpperBB,LowerBB,CCI,EVM,ROW,ForceIndex,SMA_20
984,-0.484503,-0.503409,-0.402419,-0.437284,-0.259329,-0.543551,-1.11367,-1.49288,-1.616181,-0.465577,-2.493464,0.794781,-0.002761,0.110413,0.182715,-0.533125
985,-0.442634,-0.470532,-0.45148,-0.484385,0.088,-0.540296,-1.101375,-1.483145,-1.608679,-0.44783,-2.487797,0.532602,0.052194,0.250289,0.202188,-0.531774
986,-0.675532,-0.504724,-0.574131,-0.442518,0.190995,-0.533152,-1.088104,-1.472738,-1.60059,-0.429602,-2.484395,0.124663,-0.1248,0.352095,0.362014,-0.529275
987,-0.612728,-0.665161,-0.619319,-0.675405,0.170851,-0.550048,-1.08228,-1.466922,-1.596267,-0.417149,-2.476939,-1.290419,-0.192225,-0.092903,-0.173475,-0.5388
988,-0.485811,-0.574422,-0.610281,-0.612604,0.977273,-0.559036,-1.07468,-1.459982,-1.590994,-0.403443,-2.467854,-0.672328,-0.079193,-0.075357,-0.159741,-0.545083


## **2.3 Getting the true values - getting Bullish or Bearish trend**
To convert the problem into a classifying problem, consider the following definition:
$$
y_n = o_{n+1} - o_{n}
$$
    
where $o_n$ is the opening price of the $n_{th}$ bucket

In [11]:
X, Y = model.true_values()

### 2.3.2 Splitting the Train and Test data

In [12]:
X_train, X_test, y_train, y_test = model.split_train_test(X,Y)

(790, 16) (790,)
(198, 16) (198,)


### 2.3.3 Training the XGBoost Classifier

In [13]:
XGBmodel = xgb.XGBClassifier()
XGBmodel.fit(X_train, y_train)

## **2.4 Checking the Accuracy of the classifier**

In [14]:
y_pred = XGBmodel.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 49.49%
