In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import os
home_path = os.path.expanduser("~") + '/'
from info_params import get_default_hparams
from utils.load_data import *

OpenCV is built with OpenMP support. This usually results in poor performance. For details, see https://github.com/tensorpack/benchmarks/blob/master/ImageNet/benchmark-opencv-resize.py


In [3]:
def get_data(hps):
    dfX, df_next_deltaClose = load_data_seq(hps)

    segment, next_segment, target_one_hot = segment_seq(dfX, df_next_deltaClose, hps)

    train_segment, test_segment, _, _, train_target_one_hot, test_target_one_hot = \
        train_test_split(segment, next_segment, target_one_hot, hps)

    return train_segment, test_segment, train_target_one_hot, test_target_one_hot

In [4]:
hps = get_default_hparams()
hps.data_file_name = home_path + "data/cryptodatadownload/moving_average_240h.csv"
hps.attributes_normalize_mean = ['Close', 'Volume BTC', 'Spread High-Low', 'Spread Close-Open', "MA_Close","MA_V_BTC"]
hps.is_concat = True
hps.Tau = 1
hps.T = 120
hps.C = 2
hps.D = (1+hps.is_concat) * len(hps.attributes_normalize_mean)
hps.is_differencing = True
hps.lag_time = 1
hps.N_train_seq = 10000
hps.normalize_data = 'default'
hps.normalize_data_idx = True

In [5]:
X_train, X_test, train_target_one_hot, test_target_one_hot = get_data(hps)
X_train = np.reshape(X_train, newshape=[-1, hps.T * hps.D])
X_test = np.reshape(X_test, newshape=[-1, hps.T * hps.D])

y_train = np.argmax(train_target_one_hot, axis=-1)
y_train = np.reshape(y_train, newshape=[-1, hps.Tau])

y_test = np.argmax(test_target_one_hot, axis=-1)
y_test = np.reshape(y_test, newshape=[-1, hps.Tau])

Missing Normalization


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [7]:
clf = RandomForestClassifier(random_state=2302, n_estimators=128, warm_start = True)

In [8]:
clf.fit(X_train, y_train)

  """Entry point for launching an IPython kernel.


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=128, n_jobs=None,
            oob_score=False, random_state=2302, verbose=0, warm_start=True)

# Train metrics

In [14]:
y_train_hat = clf.predict(X_train)
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(y_train, y_train_hat)
rp = classification_report(y_train, y_train_hat)

print(cm)
print(rp)

[[4766    0]
 [   0 5115]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4766
           1       1.00      1.00      1.00      5115

   micro avg       1.00      1.00      1.00      9881
   macro avg       1.00      1.00      1.00      9881
weighted avg       1.00      1.00      1.00      9881



# Test metrics

In [15]:
y_pred = clf.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(y_test, y_pred)
rp = classification_report(y_test, y_pred)

print(cm)
print(rp)

[[ 791 1547]
 [ 734 1699]]
              precision    recall  f1-score   support

           0       0.52      0.34      0.41      2338
           1       0.52      0.70      0.60      2433

   micro avg       0.52      0.52      0.52      4771
   macro avg       0.52      0.52      0.50      4771
weighted avg       0.52      0.52      0.51      4771



# Test 1000 first

In [42]:
N = 1000
y_pred = clf.predict(X_test[0:N])
from sklearn.metrics import classification_report, confusion_matrix

cm = confusion_matrix(y_test[0:N], y_pred)
rp = classification_report(y_test[0:N], y_pred)

print(cm)
print(rp)

[[225 264]
 [209 302]]
              precision    recall  f1-score   support

           0       0.52      0.46      0.49       489
           1       0.53      0.59      0.56       511

   micro avg       0.53      0.53      0.53      1000
   macro avg       0.53      0.53      0.52      1000
weighted avg       0.53      0.53      0.52      1000



# Save Model and predict:
- File model: 1.65Gb

In [23]:
#Saving model
from sklearn.externals import joblib
joblib.dump(clf, 'checkpoint/Random_Forest_sklearn.joblib')

['checkpoint/Random_Forest_sklearn.joblib']