## Necessary Imports

In [1]:
%matplotlib qt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import glob
import pickle

from sklearn import tree
from sklearn import svm
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [2]:
from mlmodel.analysis import Analyser
from mlmodel.cross_validation import Purged_validation, HyperParameterTuning
from mlmodel.validation import Validation

## Object creation

In [3]:
from datagen import DataGen
dg=DataGen()

from visualize.visualize import Visualizer
vl=Visualizer()

from mlmodel.performanceMetrics import Metrics
met_ob = Metrics()

#model selection
from mlmodel.split import Split
from mlmodel.performanceMetrics import Metrics
from mlmodel.mlclassfier import MLClassifier
from mlmodel.sequential_bootstrap import sequentialBootstrap
from mlmodel.analysis import Analyser
from mlmodel.validation import Validation

# For ML
split_ob = Split()
metrics_ob = Metrics()
model_ob = MLClassifier()
sb_ob = sequentialBootstrap()
an_ob = Analyser()
val_ob = Validation()

val_ob = Validation()
hpt_ob = HyperParameterTuning()
pv_ob = Purged_validation()

Using TensorFlow backend.


## Inputs

In [4]:
folder_name='data/historical_price_data/BTCUSDT'
bar_type='time'             #type of bars possible_values: dollar,time,ticks,volume
threshold=300               #threshold for the given type of bar

#normalization
before=True                #flag that denotes normalizing before/after bars creation
normalize=True             #flag that specifies whether normalization should be done
norm_method='multiply'     #method for nomalization include 'multiply','min_max'
norm_val=100               #threshold for the above mentioned method


# Labels
volatility_threshold=20    #threshold in bars for volatility which is standard deviation of returns
sampling=False             #flag to control downsampling
v_bars_duration=20           #threshold in bars for vertical_bars which denotes a dataframe in triple-barrier method
barrier_conf=[2,4]          #stop loss and profit taking limits [0]denotes stop loss and [1]denotes profit taking
min_return=0                #minimum values for return in triple-barrier method
risk=0                      #risk for calculating sharp_ration
sign_label=True             #flag to determine labels of vertical bars t1b

# Features
sma_period = [10, 20] # [10, 15, 20]
ema_period = [10, 20] # [10, 15, 20]
BB_period  = [15]
rsi_period = [15]
williamsr_period = [15]
roc_period = [15]
adl_period = [15]
vpt_period = [0] # 0:  period is not required
emv_period = [0] # 0:  period is not required

feature_list = ['sma',      'ema',    'BB',       'rsi',     'williamsr',        'roc', 
                'adl',     'vpt',   'emv']   #feature list 
period_all =[sma_period, ema_period, BB_period, rsi_period, williamsr_period, roc_period, 
             adl_period, vpt_period, emv_period ]  # feature list period (change this if feature_list_changed)



## Data Generator

In [5]:
raw_data,labels,labels_features,train,test=dg.create_data(folder_name,feature_list,period_all,before,normalize,norm_val,norm_method,bar_type,threshold,sampling,volatility_threshold,v_bars_duration,
                            barrier_conf,min_return,risk,sign_label)

Labels:  -1.0    28143
 1.0    16019
 0.0    12291
Name: label, dtype: int64


In [None]:
#vl.marker_plot(labels)

In [6]:
ml_normalize = True

if ml_normalize:
    # Normalize Data
    mean_df = labels_features.iloc[:, :-2].mean()
    std_df = labels_features.iloc[:, :-2].std()

    train_X_unNom,train_y = dg.preprocess(train)
    test_X_unNom ,test_y  = dg.preprocess(test)
    
    train_X = pd.DataFrame((train_X_unNom-mean_df)/std_df)
    test_X  = pd.DataFrame((test_X_unNom -mean_df)/std_df)
    train_cl, test_cl = train_X['close'],test_X['close']
else:
    # Un-Normalized train_X, test_X
    train_X,train_y = dg.preprocess(train)
    test_X ,test_y  = dg.preprocess(test)

In [7]:
_,sr,_,_=met_ob.sharpe_ratio(labels_features)
_,train_sr,_,_=met_ob.sharpe_ratio(train)
_,test_sr,_,_=met_ob.sharpe_ratio(test)

print("Train Sharpe Ratio :",train_sr)
print("Test Sharpe Ratio :",test_sr)

std =  0.0048121036644998595
std =  0.004605328050648633
std =  0.005261909774670239
Train Sharpe Ratio : 0.24374659930821482
Test Sharpe Ratio : 0.2397809183742253


In [8]:
print("Total Sharpe Ratio :",sr)

Total Sharpe Ratio : 0.24196861695468128


In [12]:
X=pd.concat([train_X, test_X], ignore_index=True)
y=pd.concat([train_y, test_y], ignore_index=True)

In [18]:
df=X.copy(deep=True)
df['label']=y

In [19]:
df.head()

Unnamed: 0,close,sma_10,sma_20,ema_10,ema_20,BB_15,rsi_15,williamsr_15,roc_15,adl_15,vpt_0,emv_0,label
0,-1.063775,-1.058465,-1.062891,-1.059808,-1.062179,-1.060097,1.521476,-1.645563,0.376723,-0.324739,-0.004463,-0.028596,-1.0
1,-1.063775,-1.059012,-1.062523,-1.060549,-1.062348,-1.061815,1.297496,-1.645563,-0.085834,-0.324581,-0.004312,-0.029496,1.0
2,-1.06238,-1.059302,-1.062085,-1.060901,-1.062368,-1.061822,0.248142,-0.387169,-0.060727,-0.324403,-0.004312,-0.029496,1.0
3,-1.057882,-1.059031,-1.061423,-1.060372,-1.061957,-1.061615,-0.172879,-1.095989,0.051178,-0.324225,-0.004311,2.659457,0.0
4,-1.057882,-1.05936,-1.06076,-1.059938,-1.061586,-1.061479,0.031677,0.28672,0.287521,-0.3244,-0.004312,2.659925,-1.0


In [60]:
for i in df.columns[:-1]:
    for j in df.columns[:-1]:
        if i != j:
            print(i,j)

close sma_10
close sma_20
close ema_10
close ema_20
close BB_15
close rsi_15
close williamsr_15
close roc_15
close adl_15
close vpt_0
close emv_0
sma_10 close
sma_10 sma_20
sma_10 ema_10
sma_10 ema_20
sma_10 BB_15
sma_10 rsi_15
sma_10 williamsr_15
sma_10 roc_15
sma_10 adl_15
sma_10 vpt_0
sma_10 emv_0
sma_20 close
sma_20 sma_10
sma_20 ema_10
sma_20 ema_20
sma_20 BB_15
sma_20 rsi_15
sma_20 williamsr_15
sma_20 roc_15
sma_20 adl_15
sma_20 vpt_0
sma_20 emv_0
ema_10 close
ema_10 sma_10
ema_10 sma_20
ema_10 ema_20
ema_10 BB_15
ema_10 rsi_15
ema_10 williamsr_15
ema_10 roc_15
ema_10 adl_15
ema_10 vpt_0
ema_10 emv_0
ema_20 close
ema_20 sma_10
ema_20 sma_20
ema_20 ema_10
ema_20 BB_15
ema_20 rsi_15
ema_20 williamsr_15
ema_20 roc_15
ema_20 adl_15
ema_20 vpt_0
ema_20 emv_0
BB_15 close
BB_15 sma_10
BB_15 sma_20
BB_15 ema_10
BB_15 ema_20
BB_15 rsi_15
BB_15 williamsr_15
BB_15 roc_15
BB_15 adl_15
BB_15 vpt_0
BB_15 emv_0
rsi_15 close
rsi_15 sma_10
rsi_15 sma_20
rsi_15 ema_10
rsi_15 ema_20
rsi_15 BB_15
rs

In [61]:
buy=df[df.label==1.0].index
hold=df[df.label==0.0].index
sell=df[df.label==-1.0].index

e_1='close'
e_2='sma_10'
plot_name='plots/'+e_1+'_vs_'+e_2+'.png' 
plt.scatter(df[e_1].ix[buy],df[e_2].ix[buy],color='g',label='buy')
plt.scatter(df[e_1].ix[hold],df[e_2].ix[hold],color='b',label='hold')
plt.scatter(df[e_1].ix[sell],df[e_2].ix[sell],color='r',label='sell')
plt.legend()
plt.savefig(plot_name)



.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated



.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated



.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated

