In [5]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score
from datasets import get_dataset, gauss_convolve_instance

['coin_index', 'open', 'high', 'low', 'close', 'volume', 'up', 'down', 'RSI', 'MA_5min', 'macd', 'macds', 'macdo'

In [6]:
[train_x, train_y], [val_x, val_y], indicate = get_dataset('./data/train_x_15_feature.npy', './data/train_y.npy')

In [7]:
print(train_x[:2, 0])
print(train_x.shape)

[[ 4.00000000e+00  9.93490994e-01  9.94522214e-01  9.92459893e-01
   9.93265510e-01  7.72771189e+04  0.00000000e+00 -2.25484371e-04
   5.04044854e+01  9.92144132e-01  2.94286659e-04 -6.61673080e-04
   9.55959739e-04]
 [ 7.00000000e+00  1.05138278e+00  1.05228949e+00  1.04594231e+00
   1.04654682e+00  6.17918199e+05  0.00000000e+00 -4.83596325e-03
   5.88984000e+01  1.05017381e+00 -3.87614483e-06  5.24975266e-04
  -5.28851410e-04]]
(6128, 78, 13)


In [8]:
for i in range(train_x.shape[0]):
    train_x[i] = gauss_convolve_instance(train_x[i], [1, 5], 0.5)
for i in range(val_x.shape[0]):
    val_x[i] = gauss_convolve_instance(val_x[i], [1, 5], 0.5)

In [9]:
train_df = pd.DataFrame()
val_df = pd.DataFrame()

In [10]:
train_isup = np.where(np.mean(train_y[:, :, 1], axis=1) >= 1, 1, 0)
val_isup = np.where(np.mean(val_y[:, :, 1], axis=1) >= 1, 1, 0)

print(train_isup.shape, val_isup.shape)
print(train_isup)

(6128,) (1533,)
[1 0 0 ... 1 1 1]


In [11]:
train_df['mean_price'] = np.mean(train_x[:, :, 1], axis=1)
val_df['mean_price'] = np.mean(val_x[:, :, 1], axis=1)

train_df['max_price'] = np.max(train_x[:, :, 2], axis=1)
val_df['max_price'] = np.max(val_x[:, :, 2], axis=1)

train_df['min_price'] = np.min(train_x[:, :, 3], axis=1)
val_df['min_price'] = np.min(val_x[:, :, 3], axis=1)

In [12]:
train_df['max_diff_inc'] = np.max(train_x[:, :, 2] - train_x[:, :, 3], axis=1)
val_df['max_diff_inc'] = np.max(val_x[:, :, 2] - val_x[:, :, 3], axis=1)

train_df['max_diff_dec'] = np.min(train_x[:, :, 2] - train_x[:, :, 3], axis=1)
val_df['max_diff_dec'] = np.min(val_x[:, :, 2] - val_x[:, :, 3], axis=1)

train_df['vol_div'] = np.max(train_x[:, :, 5], axis=1) / np.median(train_x[:, :, 5], axis=1)
val_df['vol_div'] = np.max(val_x[:, :, 5], axis=1) / np.median(val_x[:, :, 5], axis=1)

In [13]:
train_df['mean_rsi'] = np.mean(train_x[:, :, 8], axis=1)
val_df['mean_rsi'] = np.mean(val_x[:, :, 8], axis=1)

train_df['max_rsi'] = np.max(train_x[:, :, 8], axis=1)
val_df['max_rsi'] = np.max(val_x[:, :, 8], axis=1)

train_df['min_rsi'] = np.min(train_x[:, :, 8], axis=1)
val_df['min_rsi'] = np.min(val_x[:, :, 8], axis=1)

train_df['median_rsi'] = np.median(train_x[:, :, 8], axis=1)
val_df['median_rsi'] = np.median(val_x[:, :, 8], axis=1)

In [14]:
train_df['max_rsi_later'] = np.max(train_x[:, -10:, 8], axis=1)
val_df['max_rsi_later'] = np.max(val_x[:, -10:, 8], axis=1)

train_df['min_rsi_later'] = np.min(train_x[:, -10:, 8], axis=1)
val_df['min_rsi_later'] = np.min(val_x[:, -10:, 8], axis=1)

In [15]:
train_df['up_count'] = np.count_nonzero(train_x[:, :, 6], axis=1)
val_df['up_count'] = np.count_nonzero(val_x[:, :, 6], axis=1)

In [16]:
train_df['mean_ma'] = np.mean(train_x[:, :, 9], axis=1)
val_df['mean_ma'] = np.mean(val_x[:, :, 9], axis=1)

train_df['max_ma'] = np.max(train_x[:, :, 9], axis=1)
val_df['max_ma'] = np.max(val_x[:, :, 9], axis=1)

train_df['min_ma'] = np.min(train_x[:, :, 9], axis=1)
val_df['min_ma'] = np.min(val_x[:, :, 9], axis=1)

train_df['median_ma'] = np.median(train_x[:, :, 9], axis=1)
val_df['median_ma'] = np.median(val_x[:, :, 9], axis=1)

train_df['max_ma_later'] = np.max(train_x[:, -10:, 9], axis=1)
val_df['max_ma_later'] = np.max(val_x[:, -10:, 9], axis=1)

train_df['min_ma_later'] = np.min(train_x[:, -10:, 9], axis=1)
val_df['min_ma_later'] = np.min(val_x[:, -10:, 9], axis=1)

In [17]:
train_df['mean_macd'] = np.mean(train_x[:, :, 10], axis=1)
val_df['mean_macd'] = np.mean(val_x[:, :, 10], axis=1)

train_df['max_macd'] = np.max(train_x[:, :, 10], axis=1)
val_df['max_macd'] = np.max(val_x[:, :, 10], axis=1)

train_df['min_macd'] = np.min(train_x[:, :, 10], axis=1)
val_df['min_macd'] = np.min(val_x[:, :, 10], axis=1)

train_df['median_macd'] = np.median(train_x[:, :, 10], axis=1)
val_df['median_macd'] = np.median(val_x[:, :, 10], axis=1)

train_df['max_macd_later'] = np.max(train_x[:, -10:, 10], axis=1)
val_df['max_macd_later'] = np.max(val_x[:, -10:, 10], axis=1)

train_df['min_macd_later'] = np.min(train_x[:, -10:, 10], axis=1)
val_df['min_macd_later'] = np.min(val_x[:, -10:, 10], axis=1)

In [18]:
print(train_df.head(10))

   mean_price  max_price  min_price  max_diff_inc  max_diff_dec    vol_div  \
0    0.996453   1.005993   0.987369      0.012631      0.000483   2.751671   
1    0.999886   1.054708   0.942119      0.040351      0.001662  13.682935   
2    0.984641   1.006193   0.960857      0.014995      0.003065   1.997325   
3    1.042109   1.151013   0.951197      0.078269      0.000000  17.665445   
4    0.999822   1.018654   0.973989      0.020749      0.001569   6.368645   
5    1.048264   1.091436   0.994925      0.037606      0.006139   5.160394   
6    1.004550   1.019916   0.981486      0.017023      0.001831   3.715223   
7    0.971797   1.057162   0.845185      0.077138      0.005608   3.462215   
8    0.996380   1.018927   0.971609      0.031546      0.000000  19.302077   
9    0.983371   1.017694   0.950723      0.031655      0.003015   4.328784   

    mean_rsi    max_rsi    min_rsi  median_rsi  ...    min_ma  median_ma  \
0  55.810513  80.777827  29.401896   55.657287  ...  0.992144   0

In [19]:
from statsmodels.tsa.arima.model import ARIMA
from fbprophet import Prophet

In [20]:
rf = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=123456)
rf.fit(train_df, train_isup)
pred = rf.predict(val_df)
print(accuracy_score(train_isup, rf.predict(train_df)))
print(accuracy_score(val_isup, pred))

1.0
0.5303326810176126


In [21]:
et = ExtraTreesClassifier(n_estimators=100, random_state=123456)
et.fit(train_df, train_isup)
pred2 =et.predict(val_df)
print(accuracy_score(train_isup, rf.predict(train_df)))
print(accuracy_score(val_isup, pred2))

1.0
0.5355512067840835


In [24]:
import pickle
with open('./model/isup_rf', 'wb') as f:
    pickle.dump(rf, f)
with open('./model/isup_et', 'wb') as f:
    pickle.dump(et, f)
with open('./data/val_df_feature', 'wb') as f:
    pickle.dump(val_df, f)