In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import seaborn as sb
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam, SGD, Adagrad, RMSprop


## Getting the Data

In [2]:
data = pd.read_csv("data_model.csv")

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,orderType,AsianSessionMode,AsianSessionOptionLive,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,...,Rsi_H1,Rsi_H4,Rsi_D1,setTimeyear,setTimemonth,setTimeweek,setTimeday,setTimehour,setTimeminutes,setTimedayofweek
0,0,0,1,2,87.5,10.0,50,90,97,0.0,...,62.85,67.7,78.04,2001,1,1,1,0,45,0
1,1,0,1,2,87.5,10.0,50,90,97,0.0,...,62.85,67.7,78.04,2001,1,1,1,0,45,0
2,2,0,1,2,87.5,10.0,50,90,97,0.0,...,62.85,67.7,78.04,2001,1,1,1,0,45,0
3,3,0,1,3,90.3,162.0,50,192,199,1.447236,...,61.99,67.19,77.85,2001,1,1,1,6,55,0
4,4,1,2,1,61.3,10.0,50,180,187,0.0,...,41.46,33.39,21.16,2001,1,1,2,36,27,1


In [4]:
data.isna().sum()

Unnamed: 0                0
orderType                 0
AsianSessionMode          0
AsianSessionOptionLive    0
FibRetrPct                0
                         ..
setTimeweek               0
setTimeday                0
setTimehour               0
setTimeminutes            0
setTimedayofweek          0
Length: 323, dtype: int64

In [5]:
data.isna().sum().sum()

0

In [6]:
# NO NANs

## Change columns to one hot encoding

In [7]:
data["AsianSessionOptionLive"].unique()

array([2, 3, 1, 4, 5], dtype=int64)

In [8]:
data["AsianSessionMode"].unique()

array([1, 2, 4, 3], dtype=int64)

In [9]:
Encoder = OneHotEncoder()
for colName in data.columns:
    unique = data[colName].unique()
    if len(unique) <= 10 and len(unique)>2 and colName != "ratioMaxProfitToAdjSL" :
        df = pd.DataFrame(Encoder.fit_transform(data[[colName]]).toarray()).add_prefix(colName+'_')
        data = data.join(df)
        data.drop(colName, axis = 1, inplace = True)


In [10]:
data

Unnamed: 0.1,Unnamed: 0,orderType,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,minAfterLPEvent,minBeforeLPEvent,...,ClrdPivots:D1_2,ClrdPivots:D1_3,ClrdPivots:W1_0,ClrdPivots:W1_1,ClrdPivots:W1_2,setTimedayofweek_0,setTimedayofweek_1,setTimedayofweek_2,setTimedayofweek_3,setTimedayofweek_4
0,0,0,87.5,10.0,50,90,97,0.000000,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1,0,87.5,10.0,50,90,97,0.000000,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,2,0,87.5,10.0,50,90,97,0.000000,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,3,0,90.3,162.0,50,192,199,1.447236,0.0,3162514,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,4,1,61.3,10.0,50,180,187,0.000000,0.0,3161702,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281167,281654,0,37.4,27.0,50,353,360,209.797222,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281168,281655,0,37.4,27.0,50,353,360,209.797222,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281169,281656,0,37.4,27.0,50,393,400,188.817500,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281170,281657,0,37.4,27.0,50,403,410,184.212195,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


## Ranking based on highest correlation to the ouptut

In [11]:
data.corr()

Unnamed: 0.1,Unnamed: 0,orderType,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,minAfterLPEvent,minBeforeLPEvent,...,ClrdPivots:D1_2,ClrdPivots:D1_3,ClrdPivots:W1_0,ClrdPivots:W1_1,ClrdPivots:W1_2,setTimedayofweek_0,setTimedayofweek_1,setTimedayofweek_2,setTimedayofweek_3,setTimedayofweek_4
Unnamed: 0,1.000000,0.010051,0.106243,-0.181076,-0.802449,-0.396445,-0.396445,0.054770,-0.147688,-0.427674,...,-0.008966,-0.011250,-0.014788,0.012555,0.015096,0.002141,-0.009095,0.007282,-0.002324,0.002069
orderType,0.010051,1.000000,-0.066200,0.019589,-0.004505,-0.010079,-0.010079,0.002317,-0.008018,-0.010999,...,-0.008289,-0.010231,0.013955,-0.016086,0.012193,-0.001051,0.001850,-0.001328,0.000945,-0.000461
FibRetrPct,0.106243,-0.066200,1.000000,-0.062458,-0.153144,-0.214750,-0.214750,-0.000289,0.000051,-0.150420,...,-0.000588,-0.000813,0.004757,-0.005328,0.003185,-0.013414,0.004612,0.000296,0.011447,-0.003660
distToClosestOrder,-0.181076,0.019589,-0.062458,1.000000,0.204044,0.062126,0.062126,-0.003701,-0.027001,0.169987,...,-0.001792,0.013230,-0.006541,0.005944,0.004239,0.004937,-0.000695,-0.005014,-0.001441,0.002544
openSpread,-0.802449,-0.004505,-0.153144,0.204044,1.000000,0.466112,0.466112,0.008453,0.009647,0.740462,...,0.006489,0.006578,-0.004902,0.004263,0.004372,-0.003437,0.002978,-0.010225,0.002140,0.008788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
setTimedayofweek_0,0.002141,-0.001051,-0.013414,0.004937,-0.003437,-0.010123,-0.010123,-0.002157,0.384251,-0.003979,...,0.000889,0.021644,-0.159414,0.157370,0.025303,1.000000,-0.241810,-0.247010,-0.246374,-0.230053
setTimedayofweek_1,-0.009095,0.001850,0.004612,-0.000695,0.002978,-0.008469,-0.008469,0.002416,-0.022768,0.003935,...,0.017257,-0.005234,0.038751,-0.038260,-0.006119,-0.241810,1.000000,-0.262216,-0.261541,-0.244215
setTimedayofweek_2,0.007282,-0.001328,0.000296,-0.005014,-0.010225,-0.006661,-0.006661,0.005932,-0.094140,-0.002701,...,-0.014217,-0.005346,0.039703,-0.039202,-0.006250,-0.247010,-0.262216,1.000000,-0.267165,-0.249466
setTimedayofweek_3,-0.002324,0.000945,0.011447,-0.001441,0.002140,0.012503,0.012503,-0.007868,-0.118132,-0.004547,...,-0.014180,-0.005332,0.038685,-0.038174,-0.006234,-0.246374,-0.261541,-0.267165,1.000000,-0.248824


In [12]:
ranking = data.corr()["ratioMaxProfitToAdjSL"]
ranking

Unnamed: 0            0.054770
orderType             0.002317
FibRetrPct           -0.000289
distToClosestOrder   -0.003701
openSpread            0.008453
                        ...   
setTimedayofweek_0   -0.002157
setTimedayofweek_1    0.002416
setTimedayofweek_2    0.005932
setTimedayofweek_3   -0.007868
setTimedayofweek_4    0.001661
Name: ratioMaxProfitToAdjSL, Length: 469, dtype: float64

In [13]:
type(ranking)

pandas.core.series.Series

In [14]:
ranking = ranking.sort_values(ascending = False)
ranking = ranking[2:]
ranking

signalLeg:H6_1      0.020779
sentLeg:H6_1        0.020671
sentLeg:H12_1       0.019092
signalLeg:H12_1     0.018840
InsidePOI:MN1       0.017566
                      ...   
sentATR:D1         -0.027702
sentATR:H8         -0.027914
pivotLengthLeft          NaN
pivotLengthRight         NaN
isExtreme                NaN
Name: ratioMaxProfitToAdjSL, Length: 467, dtype: float64

## Spliting the output column into 0s and 1s. 0 if the output is <15 and 1 if the output is >15

In [15]:
data.loc[data["ratioMaxProfitToAdjSL"] < 15, "ratioMaxProfitToAdjSL"] = 0
data.loc[data["ratioMaxProfitToAdjSL"] >= 15,"ratioMaxProfitToAdjSL"] = 1

In [16]:
data[data["ratioMaxProfitToAdjSL"] == 1]

Unnamed: 0.1,Unnamed: 0,orderType,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,minAfterLPEvent,minBeforeLPEvent,...,ClrdPivots:D1_2,ClrdPivots:D1_3,ClrdPivots:W1_0,ClrdPivots:W1_1,ClrdPivots:W1_2,setTimedayofweek_0,setTimedayofweek_1,setTimedayofweek_2,setTimedayofweek_3,setTimedayofweek_4
643,643,0,75.0,60.0,50,130,137,1.0,0.0,3159350,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
856,857,1,100.0,130.0,50,140,147,1.0,0.0,3130043,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1044,1049,0,83.3,330.0,50,120,127,1.0,0.0,3120862,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1455,1462,1,75.0,180.0,50,130,137,1.0,0.0,3092314,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1472,1479,1,76.2,0.0,50,190,197,1.0,0.0,3101723,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281167,281654,0,37.4,27.0,50,353,360,1.0,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281168,281655,0,37.4,27.0,50,353,360,1.0,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281169,281656,0,37.4,27.0,50,393,400,1.0,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281170,281657,0,37.4,27.0,50,403,410,1.0,0.0,2890454,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [17]:
data[data["ratioMaxProfitToAdjSL"] == 0]

Unnamed: 0.1,Unnamed: 0,orderType,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,minAfterLPEvent,minBeforeLPEvent,...,ClrdPivots:D1_2,ClrdPivots:D1_3,ClrdPivots:W1_0,ClrdPivots:W1_1,ClrdPivots:W1_2,setTimedayofweek_0,setTimedayofweek_1,setTimedayofweek_2,setTimedayofweek_3,setTimedayofweek_4
0,0,0,87.5,10.0,50,90,97,0.0,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1,0,87.5,10.0,50,90,97,0.0,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,2,0,87.5,10.0,50,90,97,0.0,0.0,3162525,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,3,0,90.3,162.0,50,192,199,0.0,0.0,3162514,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,4,1,61.3,10.0,50,180,187,0.0,0.0,3161702,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280567,281054,1,42.5,29.0,0,1734,1741,0.0,817.0,472,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
280586,281073,1,53.3,1280.0,1,1882,1889,0.0,43.0,276,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
280594,281081,0,24.6,4.0,10,1472,1479,0.0,30.0,59,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
280638,281125,1,52.5,16.0,5,1455,1462,0.0,105.0,14,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [18]:
data.drop("Unnamed: 0", axis = 1, inplace = True)
data

Unnamed: 0,orderType,FibRetrPct,distToClosestOrder,openSpread,slPoints,adjSLPoints,ratioMaxProfitToAdjSL,minAfterLPEvent,minBeforeLPEvent,minAfterMPEvent,...,ClrdPivots:D1_2,ClrdPivots:D1_3,ClrdPivots:W1_0,ClrdPivots:W1_1,ClrdPivots:W1_2,setTimedayofweek_0,setTimedayofweek_1,setTimedayofweek_2,setTimedayofweek_3,setTimedayofweek_4
0,0,87.5,10.0,50,90,97,0.0,0.0,3162525,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0,87.5,10.0,50,90,97,0.0,0.0,3162525,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0,87.5,10.0,50,90,97,0.0,0.0,3162525,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0,90.3,162.0,50,192,199,0.0,0.0,3162514,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1,61.3,10.0,50,180,187,0.0,0.0,3161702,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281167,0,37.4,27.0,50,353,360,1.0,0.0,2890454,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281168,0,37.4,27.0,50,353,360,1.0,0.0,2890454,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281169,0,37.4,27.0,50,393,400,1.0,0.0,2890454,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
281170,0,37.4,27.0,50,403,410,1.0,0.0,2890454,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [19]:
X = data.loc[:,data.columns!='ratioMaxProfitToAdjSL'].values
Y = data.loc[:, 'ratioMaxProfitToAdjSL'].values

In [20]:
train_X, temp_X, train_y, temp_y = train_test_split(X, Y, train_size=0.75, random_state=0)
val_X, test_X, val_y, test_y = train_test_split(temp_X, temp_y, train_size=0.5, random_state=0)

In [21]:
train_X.shape

(210879, 467)

In [22]:
train_X=np.asarray(train_X).astype(int)
train_y=np.asarray(train_y).astype(int)
test_X=np.asarray(test_X).astype(int)
test_y=np.asarray(test_y).astype(int)
val_X=np.asarray(val_X).astype(int)
val_y=np.asarray(val_y).astype(int)

In [23]:
activation = 'sigmoid'

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(train_X.shape[1],), activation=activation))
model.add(tf.keras.layers.Dense(1, input_shape=(8,), activation=activation))


In [24]:
model.compile(loss=BinaryCrossentropy(), metrics=['accuracy'], optimizer=Adagrad())
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 468       
                                                                 
 dense_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 470
Trainable params: 470
Non-trainable params: 0
_________________________________________________________________


In [25]:
hist = model.fit(train_X, train_y, validation_data=(val_X, val_y), batch_size=16, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
score, accuracy = model.evaluate(test_X, test_y, batch_size=16, verbose=0)
print(score)
print(accuracy)

0.22616086900234222
0.950607419013977
