In [1]:
import numpy as np
import os
home_path = os.path.expanduser("~") + '/'
from utils.load_data import *
from utils.info_params import *

OpenCV is built with OpenMP support. This usually results in poor performance. For details, see https://github.com/tensorpack/benchmarks/blob/master/ImageNet/benchmark-opencv-resize.py


In [2]:
def get_data(hps):
    dfX, df_next_deltaClose = load_data_seq(hps)

    segment, next_segment, target_one_hot = segment_seq(dfX, df_next_deltaClose, hps)

    train_segment, test_segment, _, _, train_target_one_hot, test_target_one_hot = \
        train_test_split(segment, next_segment, target_one_hot, hps)

    return train_segment, test_segment, train_target_one_hot, test_target_one_hot

In [3]:
hps = get_default_hparams()
hps.data_file_name = home_path + "data/ccxt/extra/BTC_USDT_binance_1h.csv"

hps.attributes_normalize_mean = ['Open','High','Low','Close','Volume','N_buy','buy_amount_avg','sell_amount_avg','buy_amount_std','sell_amount_std','price_avg','cost_avg','cost_std','Spread_Open_Close','Spread_High_Low','MA_Close_24','MA_Close_240']


hps.is_concat = False
hps.Tau = 1
hps.T = 1
hps.C = 2
hps.D = (1+hps.is_concat) * len(hps.attributes_normalize_mean)
hps.is_differencing = True
hps.lag_time = 1
hps.N_train_seq = 15340 + hps.T - 1
hps.normalize_data = 'min_max'
hps.normalize_data_idx = True

In [4]:
X_train, X_test, train_target_one_hot, test_target_one_hot = get_data(hps)
X_train = np.reshape(X_train, newshape=[-1, hps.T * hps.D])
X_test = np.reshape(X_test, newshape=[-1, hps.T * hps.D])

y_train = np.argmax(train_target_one_hot, axis=-1)
y_train = np.reshape(y_train, newshape=[-1, hps.Tau])

y_test = np.argmax(test_target_one_hot, axis=-1)
y_test = np.reshape(y_test, newshape=[-1, hps.Tau])

Normalize: Min Max


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [6]:
clf = LogisticRegression()

In [7]:
clf.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [8]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
y_train_hat = clf.predict(X_train)
cm = confusion_matrix(y_train, y_train_hat)
rp = classification_report(y_train, y_train_hat)
score = accuracy_score(y_train, y_train_hat)

print(cm)
print(rp)
print(score)

[[1338 6064]
 [ 991 6947]]
              precision    recall  f1-score   support

           0       0.57      0.18      0.27      7402
           1       0.53      0.88      0.66      7938

   micro avg       0.54      0.54      0.54     15340
   macro avg       0.55      0.53      0.47     15340
weighted avg       0.55      0.54      0.48     15340

0.5400912646675359


In [9]:
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
rp = classification_report(y_test, y_pred)
score = accuracy_score(y_test, y_pred)

print(cm)
print(rp)
print(score)

[[258 790]
 [198 914]]
              precision    recall  f1-score   support

           0       0.57      0.25      0.34      1048
           1       0.54      0.82      0.65      1112

   micro avg       0.54      0.54      0.54      2160
   macro avg       0.55      0.53      0.50      2160
weighted avg       0.55      0.54      0.50      2160

0.5425925925925926
