## Necessary Imports

In [1]:
%matplotlib qt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import glob
import pickle

from sklearn import tree
from sklearn import svm
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

## Object creation

In [2]:
from datagen import DataGen
test=DataGen()

from visualize.visualize import Visualizer
vl=Visualizer()

from mlmodel.performanceMetrics import Metrics
met_ob = Metrics()

#model selection
from mlmodel.split import Split
from mlmodel.performanceMetrics import Metrics
from mlmodel.mlclassfier import MLClassifier
from mlmodel.sequential_bootstrap import sequentialBootstrap
from mlmodel.analysis import Analyser
from mlmodel.validation import Validation

# For ML
split_ob = Split()
metrics_ob = Metrics()
model_ob = MLClassifier()
sb_ob = sequentialBootstrap()
an_ob = Analyser()
val_ob = Validation()

Using TensorFlow backend.


## Inputs

In [3]:
folder_name='data/historical_price_data/ADABTC'
bar_type='time'             #type of bars possible_values: dollar,time,ticks,volume
threshold=300               #threshold for the given type of bar

#normalization
before=True                #flag that denotes normalizing before/after bars creation
normalize=True             #flag that specifies whether normalization should be done
norm_method='multiply'     #method for nomalization include 'multiply','min_max'
norm_val=100               #threshold for the above mentioned method


# Labels
volatility_threshold=20    #threshold in bars for volatility which is standard deviation of returns
sampling=False             #flag to control downsampling
v_bars_duration=20           #threshold in bars for vertical_bars which denotes a dataframe in triple-barrier method
barrier_conf=[2,4]          #stop loss and profit taking limits [0]denotes stop loss and [1]denotes profit taking
min_return=0                #minimum values for return in triple-barrier method
risk=0                      #risk for calculating sharp_ration
sign_label=True             #flag to determine labels of vertical bars t1b

# Features
sma_period = [10, 20] # [10, 15, 20]
ema_period = [10, 20] # [10, 15, 20]
BB_period  = [15]
rsi_period = [15]
williamsr_period = [15]
roc_period = [15]
adl_period = [15]
vpt_period = [0] # 0:  period is not required
emv_period = [0] # 0:  period is not required

feature_list = ['sma',      'ema',    'BB',       'rsi',     'williamsr',        'roc', 
                'adl',     'vpt',   'emv']   #feature list 
period_all =[sma_period, ema_period, BB_period, rsi_period, williamsr_period, roc_period, 
             adl_period, vpt_period, emv_period ]  # feature list period (change this if feature_list_changed)



## Data Generator

In [4]:
raw_data,labels,labels_features=test.create_data(folder_name,feature_list,period_all,before,normalize,norm_val,norm_method,bar_type,threshold,sampling,volatility_threshold,v_bars_duration,
                            barrier_conf,min_return,risk,sign_label)

Labels:  -1.0    29387
 1.0    15168
 0.0     9389
Name: label, dtype: int64


In [5]:
vl.marker_plot(labels)

(<matplotlib.axes._subplots.AxesSubplot at 0x7f6145923438>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f6145ced7b8>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f6145d91be0>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7f6145975048>)

In [6]:
df=labels_features
df = df.dropna()
df.head()

Unnamed: 0,Close,sma_10,sma_20,ema_10,ema_20,BB_15,rsi_15,williamsr_15,roc_15,adl_15,vpt_0,emv_0,label
0,35.225378,35.050326,47.394563,40.36601,70.802651,36.611685,25.995316,-91.666667,-5.674342,-2830.288721,1.338067,-1230.599991,0.0
1,35.286799,35.010402,35.492562,39.442517,67.420189,35.836215,28.908795,-82.258065,1.952085,-13608.212277,0.324658,-636.689638,0.0
2,35.870306,35.108676,35.535558,38.793024,64.415438,35.972352,38.905181,-58.870968,1.476977,-3531.642879,8.091334,-372.756434,0.0
3,36.760922,35.222307,35.561662,38.423551,61.781675,36.413014,55.849057,-51.612903,4.450262,-12249.044671,19.691735,-353.532492,0.0
4,37.03732,35.48642,35.552449,38.171509,59.425069,36.853999,57.29927,-40.322581,4.778454,-8053.932527,3.781199,-299.066815,0.0


## ML

In [7]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
train_X, train_y, test_X, test_y = split_ob.train_test_split(X, y, 0.7) # split training-testing data

In [68]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional,Convolution1D
from keras.utils.np_utils import to_categorical
import time

In [69]:
X_=train_X.values
y_=train_y.values

In [70]:
y_=to_categorical(y_,num_classes=3)

In [71]:
print(X_.shape)
print(y_.shape)

(37748, 12)
(37748, 3)


In [44]:
print(X_.shape)
print(y_.shape)

(37748, 1, 12)
(37748, 1, 3)


In [63]:
model = Sequential()
model.add(Convolution1D(input_shape = (WINDOW, EMB_SIZE),
                        nb_filter=16,
                        filter_length=4,
                        border_mode='same'))
model.add(MaxPooling1D(2))
model.add(LeakyReLU())
model.add(Convolution1D(nb_filter=32,
                        filter_length=4,
                        border_mode='same'))
model.add(MaxPooling1D(2))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(16))
model.add(LeakyReLU())
model.add(Dense(1))
model.add(Activation('linear'))
opt = Nadam(lr=0.002)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=25, min_lr=0.000001, verbose=1)
checkpointer = ModelCheckpoint(filepath="lolkekr.hdf5", verbose=1, save_best_only=True)
model.compile(optimizer=opt, 
              loss='mae')
history = model.fit(X_train, Y_train, 
          nb_epoch = 100, 
          batch_size = 128, 
          verbose=1, 
          validation_data=(X_test, Y_test),
          callbacks=[reduce_lr, checkpointer],
          shuffle=True)


NameError: name 'Convolution1D' is not defined

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_11 (Bidirectio (None, 1, 12)             2400      
_________________________________________________________________
dense_18 (Dense)             (None, 1, 3)              39        
Total params: 2,439
Trainable params: 2,439
Non-trainable params: 0
_________________________________________________________________



The `nb_epoch` argument in `fit` has been renamed `epochs`.



Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250

KeyboardInterrupt: 

In [18]:
# create model
model = Sequential()
model.add(Dense(24, input_dim=12, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 24)                312       
_________________________________________________________________
dense_5 (Dense)              (None, 12)                300       
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 78        
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 21        
Total params: 711
Trainable params: 711
Non-trainable params: 0
_________________________________________________________________


In [19]:
model.fit(X_, y_, epochs=150, batch_size=10)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
 7810/37748 [=====>........................] - ETA: 2s - loss: 12.5020 - acc: 0.1924

KeyboardInterrupt: 