## Necessary Imports

In [32]:
%matplotlib qt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import glob
import pickle

from sklearn import tree
from sklearn import svm
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [33]:
from mlmodel.analysis import Analyser
from mlmodel.cross_validation import Purged_validation, HyperParameterTuning
from mlmodel.validation import Validation

## Object creation

In [34]:
from datagen import DataGen
dg=DataGen()

from visualize.visualize import Visualizer
vl=Visualizer()

from mlmodel.performanceMetrics import Metrics
met_ob = Metrics()

#model selection
from mlmodel.split import Split
from mlmodel.performanceMetrics import Metrics
from mlmodel.mlclassfier import MLClassifier
from mlmodel.sequential_bootstrap import sequentialBootstrap
from mlmodel.analysis import Analyser
from mlmodel.validation import Validation

# For ML
split_ob = Split()
metrics_ob = Metrics()
model_ob = MLClassifier()
sb_ob = sequentialBootstrap()
an_ob = Analyser()
val_ob = Validation()

val_ob = Validation()
hpt_ob = HyperParameterTuning()
pv_ob = Purged_validation()

## Inputs

In [9]:
folder_name='data/historical_price_data/BTCUSDT'
bar_type='time'             #type of bars possible_values: dollar,time,ticks,volume
threshold=300               #threshold for the given type of bar

#normalization
before=True                #flag that denotes normalizing before/after bars creation
normalize=True             #flag that specifies whether normalization should be done
norm_method='multiply'     #method for nomalization include 'multiply','min_max'
norm_val=100               #threshold for the above mentioned method


# Labels
volatility_threshold=20    #threshold in bars for volatility which is standard deviation of returns
sampling=False             #flag to control downsampling
v_bars_duration=20           #threshold in bars for vertical_bars which denotes a dataframe in triple-barrier method
barrier_conf=[2,4]          #stop loss and profit taking limits [0]denotes stop loss and [1]denotes profit taking
min_return=0                #minimum values for return in triple-barrier method
risk=0                      #risk for calculating sharp_ration
sign_label=True             #flag to determine labels of vertical bars t1b

# Features
sma_period = [10, 20] # [10, 15, 20]
ema_period = [10, 20] # [10, 15, 20]
BB_period  = [15]
rsi_period = [15]
williamsr_period = [15]
roc_period = [15]
adl_period = [15]
vpt_period = [0] # 0:  period is not required
emv_period = [0] # 0:  period is not required

feature_list = ['sma',      'ema',    'BB',       'rsi',     'williamsr',        'roc', 
                'adl',     'vpt',   'emv']   #feature list 
period_all =[sma_period, ema_period, BB_period, rsi_period, williamsr_period, roc_period, 
             adl_period, vpt_period, emv_period ]  # feature list period (change this if feature_list_changed)



## Data Generator

In [10]:
raw_data,labels,labels_features,train,test=dg.create_data(folder_name,feature_list,period_all,before,normalize,norm_val,norm_method,bar_type,threshold,sampling,volatility_threshold,v_bars_duration,
                            barrier_conf,min_return,risk,sign_label)

Labels:  -1.0    28143
 1.0    16019
 0.0    12291
Name: label, dtype: int64


In [7]:
#vl.marker_plot(labels)

In [11]:
train_X,train_y=dg.preprocess(train)
test_X,test_y=dg.preprocess(test)

In [12]:
_,sr,_,_=met_ob.sharpe_ratio(labels_features)
_,train_sr,_,_=met_ob.sharpe_ratio(train)
_,test_sr,_,_=met_ob.sharpe_ratio(test)

print("Train Sharpe Ratio :",train_sr)
print("Test Sharpe Ratio :",test_sr)

std =  0.004812103664500019
std =  0.004605328050648738
std =  0.00526190977467035
Train Sharpe Ratio : 0.24374659930820866
Test Sharpe Ratio : 0.23978091837422003


In [13]:
print("Total Sharpe Ratio :",sr)

Total Sharpe Ratio : 0.24196861695467414


In [14]:
def find_defective(temp_df,df):
    
    labels=temp_df.label
    
    pos_labs=labels[df.label==1.0]
    pos_labs[pos_labs==-1.0]=-2.0
    pos_labs[pos_labs==0.0]=10.0
    labels[pos_labs.index]=pos_labs
    
    neg_labs=labels[df.label==-1.0]
    neg_labs[neg_labs==1.0]=2.0
    neg_labs[neg_labs==0.0]=10.0
    labels[neg_labs.index]=neg_labs
    
    zero_labs=labels[df.label==0.0]
    zero_labs[zero_labs==1.0]=2.0
    zero_labs[zero_labs==-1.0]=-2.0
    labels[zero_labs.index]=zero_labs
    
    temp_df['label']=labels
    
    return temp_df

def calc_sharp_ratio(clf,df):
    preds=clf.predict(df)
    temp_df = df.copy(deep=True)
    temp_df['label']=preds
    _,sr,_,_=met_ob.sharpe_ratio(temp_df)
    
    return sr,temp_df

## ML

In [None]:
filename = 'svm_no_balance.sav'
try: 
    clf = joblib.load(filename)
except (OSError, IOError) as e:
    print(e)
    clf = svm.SVC(kernel='rbf')
    clf.fit(train_X, train_y,)
    joblib.dump(clf,filename)
    
acc_train = clf.score(train_X, train_y)
acc_test = clf.score(test_X, test_y)

sr,temp_df=calc_sharp_ratio(clf,test_X)

print( ' train: ',  acc_train, ' test: ',  acc_test, 's_r: ',sr)


In [None]:
sr,temp_df=calc_sharp_ratio(clf,test_X)
temp_df=find_defective(temp_df,test)
vl.compare_labels(test,temp_df,True)
print("Test Sharp Ratio:",sr)

In [14]:
filename = 'svm_balanced_classes.sav'
try: 
    clf = joblib.load(filename)
except (OSError, IOError) as e:
    
    print(e)
    clf = svm.SVC(kernel='rbf',class_weight='balanced', C=1.0, random_state=0)
    clf.fit(train_X, train_y)
    joblib.dump(clf,filename)
    
acc_train = clf.score(train_X, train_y)
acc_test = clf.score(test_X, test_y)

sr,temp_df=calc_sharp_ratio(clf,test_X)

print( ' train: ',  acc_train, ' test: ',  acc_test, 's_r: ',sr)


 train:  1.0  test:  0.5344634103124446 s_r:  0.004933737960265892


In [15]:
sr,temp_df=calc_sharp_ratio(clf,test_X)
temp_df=find_defective(temp_df,test)
vl.compare_labels(test,temp_df,True)
print("Test Sharp Ratio:",sr)

Test Sharp Ratio: 0.004933737960265892


In [19]:
scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(train_X),columns=train_X.columns)
X_test = pd.DataFrame(scaler.fit_transform(test_X),columns=test_X.columns)

X_train['label']=train.label
train_scaled=X_train.copy()
del X_train['label']

X_test['label']=test.label
test_scaled=X_test.copy()
del X_test['label']

In [37]:
X_train

Unnamed: 0,close,sma_10,sma_20,ema_10,ema_20,BB_15,rsi_15,williamsr_15,roc_15,adl_15,vpt_0,emv_0
0,-0.803359,-0.798308,-0.802303,-0.799532,-0.801596,-0.799861,1.511174,-1.585620,0.390466,-0.400328,-0.012513,-0.032132
1,-0.803359,-0.798816,-0.801961,-0.800221,-0.801753,-0.801457,1.285101,-1.585620,-0.111914,-0.399700,-0.000730,-0.032885
2,-0.802062,-0.799086,-0.801554,-0.800548,-0.801772,-0.801463,0.225949,-0.353700,-0.084645,-0.398998,-0.000730,-0.032885
3,-0.797883,-0.798834,-0.800938,-0.800056,-0.801391,-0.801270,-0.199004,-1.047608,0.036893,-0.398290,-0.000672,2.217117
4,-0.797883,-0.799140,-0.800323,-0.799653,-0.801045,-0.801144,0.007462,0.306012,0.293585,-0.398985,-0.000730,2.217509
5,-0.794460,-0.798545,-0.799591,-0.798701,-0.800407,-0.800531,0.138540,-0.770012,0.198422,-0.399614,-0.000598,2.369777
6,-0.797181,-0.798582,-0.798939,-0.798417,-0.800088,-0.800412,0.051629,0.279789,0.097861,-0.400158,-0.000774,2.369790
7,-0.794526,-0.798354,-0.798557,-0.797702,-0.799547,-0.799755,0.259620,0.279789,0.326329,-0.399673,-0.000478,2.838289
8,-0.794526,-0.798125,-0.798219,-0.797117,-0.799058,-0.799213,0.441258,-0.366809,-0.018884,-0.399278,-0.000730,2.837570
9,-0.798738,-0.798317,-0.798201,-0.797404,-0.799016,-0.799687,-0.054065,-0.400598,0.065587,-0.398908,-0.000730,2.838759


In [20]:
filename = 'svm_no_balance_scaled.sav'
try: 
    clf = joblib.load(filename)
except (OSError, IOError) as e:
    print(e)
    clf = svm.SVC(kernel='rbf')
    clf.fit(X_train, train_y,)
    joblib.dump(clf,filename)
    
acc_train = clf.score(X_train, train_y)
acc_test = clf.score(X_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_test)

print( ' train: ',  acc_train, ' test: ',  acc_test, 's_r: ',sr)


KeyboardInterrupt: 

In [18]:
sr,temp_df=calc_sharp_ratio(clf,X_test)
temp_df=find_defective(temp_df,test_scaled)
vl.compare_labels(test_scaled,temp_df,True)
print("Test Sharp Ratio:",sr)

Test Sharp Ratio: 0.004816968433623166


In [19]:
filename = 'svm_balanced_classes_scaled.sav'
try: 
    clf = joblib.load(filename)
except (OSError, IOError) as e:
    print(e)
    clf = svm.SVC(kernel='rbf',class_weight='balanced', C=1.0, random_state=0)
    clf.fit(X_train, train_y)
    joblib.dump(clf,filename)
    
acc_train = clf.score(X_train, train_y)
acc_test = clf.score(X_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_test)

print( ' train: ',  acc_train, ' test: ',  acc_test, 's_r: ',sr)


 train:  0.6264935196435804  test:  0.5762801960900124 s_r:  0.0037417165950409613


In [20]:
sr,temp_df=calc_sharp_ratio(clf,X_test)
temp_df=find_defective(temp_df,test_scaled)
vl.compare_labels(test_scaled,temp_df,True)
print("Test Sharp Ratio:",sr)

Test Sharp Ratio: 0.0037417165950409613


In [21]:
temp_df.label.value_counts()

-1.0     6144
 10.0    3979
 1.0     2361
 2.0     2267
 0.0     1252
-2.0      928
Name: label, dtype: int64

In [22]:
test_scaled.label.value_counts()

-1.0    9049
 1.0    4755
 0.0    3127
Name: label, dtype: int64

In [None]:
X=pd.concat([X_train,X_test]).reset_index()
del X['index']
y=pd.concat([train_y,test_y]).reset_index()
del y['index']

In [22]:
metric = 'f1_micro'    
#an_ob.check_overfitting(clf, X, y, scoring=metric)

In [None]:
an_frame=X.copy()
an_frame['label']=y
an_frame['label_pred']=clf.predict(X)

In [None]:
an_ob.check_dataset_correlation(an_frame)

In [23]:
clf = RandomForestClassifier(n_estimators=1, bootstrap=False,class_weight='balanced_subsample')
an_ob.check_overfitting(clf, X_train, train_y, scoring=metric)

The model is overfitting the training set


In [28]:
tree_depth = 10
num_estimator = [10, 20, 50, 100, 200, 300]
avgU = 1.

for i in num_estimator:
    clf = tree.DecisionTreeClassifier(max_depth = tree_depth)
    clf = RandomForestClassifier(n_estimators=i, criterion='entropy', max_depth = tree_depth,  max_features=None) 
    clf_RF = clf.fit(X_train, train_y)
    acc_train_RF = clf_RF.score(X_train, train_y)
    acc_test_RF = clf_RF.score(X_test, test_y)
    
    sr,temp_df=calc_sharp_ratio(clf,X)
    
    print(i, ' train: ',  acc_train_RF, ' test: ',  acc_test_RF,' sharp_ratio:',sr)

10  train:  0.7134214256784123  test:  0.6454432697418936  sharp_ratio: 0.004216038512927852
20  train:  0.7162312677197246  test:  0.6451479534581537  sharp_ratio: 0.004216769750532545
50  train:  0.716054070473876  test:  0.6615675388340914  sharp_ratio: 0.004161099603382443
100  train:  0.7179019846091536  test:  0.6519402279841711  sharp_ratio: 0.004216029674898771
200  train:  0.7188892264074525  test:  0.6581418699427086  sharp_ratio: 0.004214580341479044
300  train:  0.7176994734710409  test:  0.65459807453783  sharp_ratio: 0.0042180087820229365


In [29]:
print("Features sorted by their score:")
print(sorted(zip(map(lambda x: round(x, 4), clf_RF.feature_importances_), X.columns), 
             reverse=True))

Features sorted by their score:
[(0.6167, 'williamsr_15'), (0.0607, 'adl_15'), (0.0605, 'vpt_0'), (0.0488, 'rsi_15'), (0.0486, 'emv_0'), (0.0363, 'roc_15'), (0.0312, 'close'), (0.0285, 'BB_15'), (0.0189, 'sma_20'), (0.0184, 'ema_20'), (0.0169, 'sma_10'), (0.0146, 'ema_10')]


In [32]:
an_frame=X.copy()
an_frame['label']=y
an_frame['label_pred']=clf_RF.predict(X)

In [36]:
an_ob.check_dataset_correlation(an_frame)

In [31]:
sr,temp_df=calc_sharp_ratio(clf_RF,X_test)
temp_df=find_defective(temp_df,test_scaled)
vl.compare_labels(test_scaled,temp_df,True)
print("Test Sharp Ratio:",sr)

Test Sharp Ratio: 0.003343365908095097


In [24]:
X_rf_train=X_train[['close','sma_10','sma_20','ema_10','ema_20','BB_15']].copy()
X_rf_test=X_test[['close','sma_10','sma_20','ema_10','ema_20','BB_15']].copy()

In [26]:
clf = tree.DecisionTreeClassifier(max_depth = 10)
clf = RandomForestClassifier(n_estimators=200, criterion='entropy', max_depth = 10,  max_features=None) 
clf_RF = clf.fit(X_rf_train, train_y)
acc_train_RF = clf_RF.score(X_rf_train, train_y)
acc_test_RF = clf_RF.score(X_rf_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_rf_train)

print( ' train: ',  acc_train_RF, ' test: ',  acc_test_RF,' sharp_ratio:',sr)

std =  180.36351756318416
 train:  0.574448157149  test:  0.437068099935  sharp_ratio: 0.004943361391847213


In [42]:
X_rf_train=X_train[['close','williamsr_15','rsi_15']].copy()
X_rf_test=X_test[['close','williamsr_15','rsi_15']].copy()

In [45]:
clf = tree.DecisionTreeClassifier(max_depth = tree_depth)
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth = tree_depth,  max_features=None) 
clf_RF = clf.fit(X_rf_train, train_y)
acc_train_RF = clf_RF.score(X_rf_train, train_y)
acc_test_RF = clf_RF.score(X_rf_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_rf_train)

print(i, ' train: ',  acc_train_RF, ' test: ',  acc_test_RF,' sharp_ratio:',sr)

300  train:  0.6826903604698258  test:  0.6805268442501919  sharp_ratio: 0.00502461436987799


In [46]:
X_rf_train=X_train[['close','williamsr_15','rsi_15','adl_15','vpt_0']].copy()
X_rf_test=X_test[['close','williamsr_15','rsi_15','adl_15','vpt_0']].copy()

In [47]:
clf = tree.DecisionTreeClassifier(max_depth = tree_depth)
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth = tree_depth,  max_features=None) 
clf_RF = clf.fit(X_rf_train, train_y)
acc_train_RF = clf_RF.score(X_rf_train, train_y)
acc_test_RF = clf_RF.score(X_rf_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_rf_train)

print(i, ' train: ',  acc_train_RF, ' test: ',  acc_test_RF,' sharp_ratio:',sr)

300  train:  0.7076751721344674  test:  0.667414801252141  sharp_ratio: 0.005026369533196405


In [51]:
X_rf_train=X_train[['close','williamsr_15']].copy()
X_rf_test=X_test[['close','williamsr_15']].copy()

In [53]:
clf = tree.DecisionTreeClassifier(max_depth = tree_depth)
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth = tree_depth,  max_features=None) 
clf_RF = clf.fit(X_rf_train, train_y)
acc_train_RF = clf_RF.score(X_rf_train, train_y)
acc_test_RF = clf_RF.score(X_rf_test, test_y)

sr,temp_df=calc_sharp_ratio(clf,X_rf_train)

print(i, ' train: ',  acc_train_RF, ' test: ',  acc_test_RF,' sharp_ratio:',sr)

300  train:  0.6775263264479546  test:  0.6768649223318174  sharp_ratio: 0.004949019632714875


In [56]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier

pctEmbargo=0.01
t1=labels_features['date']

rf = RandomForestClassifier(n_estimators=1, bootstrap=False,class_weight='balanced_subsample')
rf = BaggingClassifier(base_estimator=rf)
pv_ob.learning_curve(rf, X, y, t1=t1, cv=5, pctEmbargo=pctEmbargo)

TypeError: Cannot compare type 'Timestamp' with type 'int'

In [57]:
labels_features.head()

Unnamed: 0,close,sma_10,sma_20,ema_10,ema_20,BB_15,rsi_15,williamsr_15,roc_15,adl_15,vpt_0,emv_0,label,date
0,49.984049,50.237279,50.028577,50.176865,50.06802,50.544568,79.38564,-100.0,0.702108,22.501367,-0.04696,-545.854467,-1.0,2017-08-17 05:35:00
1,49.984049,50.211386,50.045985,50.141808,50.060023,50.461791,75.256945,-100.0,-0.122422,429.364805,0.0,-629.216694,1.0,2017-08-17 05:40:00
2,50.050091,50.197654,50.066694,50.125132,50.059077,50.461474,55.913938,-62.239207,-0.077668,884.84878,0.0,-629.216694,1.0,2017-08-17 05:45:00
3,50.263008,50.210478,50.098049,50.1502,50.078499,50.47147,48.153145,-83.508869,0.121808,1344.075855,0.000231,248301.056236,0.0,2017-08-17 05:50:00
4,50.263008,50.194905,50.129405,50.170711,50.096071,50.477996,51.923787,-42.017738,0.543102,893.12863,0.0,248344.433768,-1.0,2017-08-17 05:55:00


In [58]:
raw_data.head()

Unnamed: 0,DateStop,Open,High,Low,Close,Volume,Price,date
0,2017-08-17 04:04:00,49.635903,49.858139,49.635903,49.635903,15.098019,49.68035,2017-08-17 04:00:00
1,2017-08-17 04:09:00,49.635903,49.635903,49.635903,49.635903,0.0,49.635903,2017-08-17 04:05:00
2,2017-08-17 04:14:00,49.635903,49.635903,49.635903,49.635903,0.0,49.635903,2017-08-17 04:10:00
3,2017-08-17 04:19:00,49.635903,49.675505,49.635903,49.635903,3.342756,49.651744,2017-08-17 04:15:00
4,2017-08-17 04:24:00,49.675505,49.691928,49.675505,49.691928,16.060217,49.688643,2017-08-17 04:20:00


In [30]:
## Necessary Imports

# %matplotlib qt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import glob
import pickle

from sklearn import tree
from sklearn import svm
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)


## Object creation

from datagen import DataGen
dg=DataGen()

from visualize.visualize import Visualizer
vl=Visualizer()

from mlmodel.performanceMetrics import Metrics
met_ob = Metrics()

#model selection
from mlmodel.split import Split
from mlmodel.performanceMetrics import Metrics
from mlmodel.mlclassfier import MLClassifier
from mlmodel.sequential_bootstrap import sequentialBootstrap
from mlmodel.analysis import Analyser
from mlmodel.validation import Validation

# For ML
split_ob = Split()
metrics_ob = Metrics()
model_ob = MLClassifier()
sb_ob = sequentialBootstrap()
an_ob = Analyser()
val_ob = Validation()

## Inputs

folder_name='data/historical_price_data/BTCUSDT'
bar_type='time'             #type of bars possible_values: dollar,time,ticks,volume
threshold=300               #threshold for the given type of bar

#normalization
before=True                #flag that denotes normalizing before/after bars creation
normalize=True             #flag that specifies whether normalization should be done
norm_method='multiply'     #method for nomalization include 'multiply','min_max'
norm_val=100               #threshold for the above mentioned method

#split
split=0.7 


# Labels
volatility_threshold=20    #threshold in bars for volatility which is standard deviation of returns
sampling=False             #flag to control downsampling
v_bars_duration=20           #threshold in bars for vertical_bars which denotes a dataframe in triple-barrier method
barrier_conf=[2,4]          #stop loss and profit taking limits [0]denotes stop loss and [1]denotes profit taking
min_return=0                #minimum values for return in triple-barrier method
risk=0                      #risk for calculating sharp_ration
sign_label=True             #flag to determine labels of vertical bars t1b

# Features
sma_period = [10,20,30] # [10, 15, 20]
ema_period = [10,30] # [10, 15, 20]
BB_period  = [15]
rsi_period = [15]
williamsr_period = [15]
roc_period = [15]
adl_period = [15]
vpt_period = [0] # 0:  period is not required
emv_period = [0] # 0:  period is not required"""

"""
# Features
sma_period = [] # [10, 15, 20]
ema_period = [] # [10, 15, 20]
BB_period  = []
rsi_period = []
williamsr_period = []
roc_period = []
adl_period = []
diff_ma_period = [10,15,20]

vpt_period = [] # 0:  period is not required
emv_period = [] # 0:  period is not required
"""

feature_list = ['sma',      'ema',    'BB',       'rsi',     'williamsr',        'roc', 
                'adl',     'vpt',   'emv', ]   #feature list 
period_all =[sma_period, ema_period, BB_period, rsi_period, williamsr_period, roc_period, 
             adl_period, vpt_period, emv_period ]  # feature list period (change this if feature_list_changed)



## Data Generator

raw_data,labels,labels_features,train,test=dg.create_data(folder_name,feature_list,period_all,before,normalize,norm_val,norm_method,bar_type,threshold,sampling,volatility_threshold,v_bars_duration,barrier_conf,min_return,risk,sign_label)

labels_features

[Errno 2] No such file or directory: "saved_runs/features_USDT_True_True_100_multiply_['sma', 'ema', 'BB', 'rsi', 'williamsr', 'roc', 'adl', 'vpt', 'emv']_[[10, 20, 30], [10, 30], [15], [15], [15], [15], [15], [0], [0]]"



invalid value encountered in double_scalars



Labels:  -1.0    28143
 1.0    16019
 0.0    12291
Name: label, dtype: int64


Unnamed: 0,close,sma_10,sma_20,sma_30,ema_10,ema_30,BB_15,rsi_15,williamsr_15,roc_15,adl_15,vpt_0,emv_0,label,date
0,50.212924,50.259665,50.248472,50.105607,50.272110,50.123474,50.552799,53.533637,-47.383022,-0.207870,1.150155e+03,-2.853703e-03,317055.927862,-1.0,2017-08-17 06:25:00
1,50.315539,50.292814,50.252100,50.128261,50.280006,50.135865,50.552553,44.930155,-0.000000,-0.003935,1.310858e+03,4.443870e-03,317230.335547,-1.0,2017-08-17 06:30:00
2,50.613950,50.349200,50.273427,50.160863,50.340723,50.166709,50.624701,49.912560,-0.000000,0.589120,1.468231e+03,4.961351e-02,317426.197829,-1.0,2017-08-17 06:35:00
3,50.613950,50.384294,50.297386,50.193464,50.390400,50.195564,50.685669,58.743536,-26.412819,0.589120,1.350033e+03,0.000000e+00,317426.197829,-1.0,2017-08-17 06:40:00
4,50.435392,50.401533,50.298219,50.220114,50.398581,50.211036,50.700078,60.584233,-31.751358,0.902975,1.205618e+03,-0.000000e+00,316191.639977,-1.0,2017-08-17 06:45:00
5,50.435392,50.401335,50.313250,50.244896,50.405274,50.225511,50.692827,60.380330,-59.155241,0.902975,1.205550e+03,0.000000e+00,316191.639977,-1.0,2017-08-17 06:50:00
6,50.368186,50.408277,50.315783,50.269317,50.398531,50.234716,50.661719,68.120008,-59.155241,0.635554,1.317945e+03,-5.891095e-05,316191.639977,-1.0,2017-08-17 06:55:00
7,50.368186,50.401696,50.318317,50.280080,50.393013,50.243327,50.629764,62.758582,-67.702344,0.209254,1.381720e+03,0.000000e+00,67253.475663,-1.0,2017-08-17 07:00:00
8,50.278150,50.386111,50.316348,50.286385,50.372129,50.245573,50.628840,58.506319,-67.702344,0.030125,1.324379e+03,-0.000000e+00,40013.473246,-1.0,2017-08-17 07:05:00
9,50.278150,50.391982,50.314380,50.288928,50.355042,50.247675,50.627883,53.232867,-67.702344,-0.315683,1.274773e+03,0.000000e+00,23155.940268,-1.0,2017-08-17 07:10:00


In [35]:
from sklearn.ensemble import BaggingClassifier

In [36]:
clf2 = tree.DecisionTreeClassifier(max_depth=10,random_state=1000)
bag2 = BaggingClassifier(base_estimator=clf2,n_estimators=30,random_state=1300)

# Class balancing is off
if True:
    clf = tree.DecisionTreeClassifier(max_depth=10,random_state=1000)
    bag = BaggingClassifier(base_estimator=clf,n_estimators=30,random_state=1300)

In [None]:
clf.