# Importari

In [1]:
import pandas as pd

from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.regularizers import l1
from tensorflow.keras.regularizers import l2

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

# Crearea si pregatirea setului de date

In [2]:
main_df = pd.DataFrame()

ratios = ["BCHUSDT-binance", "BTCUSDT-binance", "ETHUSDT-binance", "LTCUSDT-binance"]

for ratio in ratios:
    dataset = pd.read_csv(
        f"crypto_data/{ratio}.csv")
    dataset.rename(
        columns={"close": f"{ratio}-close", "volume": f"{ratio}-volume"}, inplace=True
    )
    
    dataset.set_index("time", inplace=True)

    dataset.drop(["low", "high", "open"], axis=1, inplace=True)

    if main_df.empty:
        main_df = dataset
    else:
        main_df = main_df.join(dataset)

In [3]:
main_df.head()

Unnamed: 0_level_0,BCHUSDT-binance-close,BCHUSDT-binance-volume,BTCUSDT-binance-close,BTCUSDT-binance-volume,ETHUSDT-binance-close,ETHUSDT-binance-volume,LTCUSDT-binance-close,LTCUSDT-binance-volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1577836800,203.95,409.1,7180.97,202.94,128.91,1883.58,41.19,1355.11
1577837700,203.95,158.42,7178.45,128.24,128.78,3686.07,41.2,649.58
1577838600,204.54,245.08,7179.56,83.49,128.86,1141.18,41.26,478.91
1577839500,204.43,60.74,7177.02,97.14,128.87,1058.34,41.28,344.47
1577840400,205.48,547.23,7190.86,103.52,129.13,1369.77,41.38,613.06


In [4]:
main_df.columns.values

array(['BCHUSDT-binance-close', 'BCHUSDT-binance-volume',
       'BTCUSDT-binance-close', 'BTCUSDT-binance-volume',
       'ETHUSDT-binance-close', 'ETHUSDT-binance-volume',
       'LTCUSDT-binance-close', 'LTCUSDT-binance-volume'], dtype=object)

# Parametri

In [5]:
SEQ_LEN = 16
FUTURE_PRED = 4
COIN = ratios[1] #BTC-USDT
VAL_PCT = 0.15

In [6]:
def classify(current, future):
    if float(current) <= float(future):
        return 1 # when the price is higher we buy
    else:
        return 0 # when the price is lower we sell

In [7]:
main_df["future"] = main_df[f"{COIN}-close"].shift(-FUTURE_PRED)
main_df[[f'{COIN}-close', 'future']].head()

Unnamed: 0_level_0,BTCUSDT-binance-close,future
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1577836800,7180.97,7190.86
1577837700,7178.45,7212.1
1577838600,7179.56,7211.87
1577839500,7177.02,7216.27
1577840400,7190.86,7211.97


In [8]:
main_df["target"] = list(
    map(classify, main_df[f"{COIN}-close"], main_df["future"])
)
main_df.head()

Unnamed: 0_level_0,BCHUSDT-binance-close,BCHUSDT-binance-volume,BTCUSDT-binance-close,BTCUSDT-binance-volume,ETHUSDT-binance-close,ETHUSDT-binance-volume,LTCUSDT-binance-close,LTCUSDT-binance-volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1577836800,203.95,409.1,7180.97,202.94,128.91,1883.58,41.19,1355.11,7190.86,1
1577837700,203.95,158.42,7178.45,128.24,128.78,3686.07,41.2,649.58,7212.1,1
1577838600,204.54,245.08,7179.56,83.49,128.86,1141.18,41.26,478.91,7211.87,1
1577839500,204.43,60.74,7177.02,97.14,128.87,1058.34,41.28,344.47,7216.27,1
1577840400,205.48,547.23,7190.86,103.52,129.13,1369.77,41.38,613.06,7211.97,1


# Impartirea setului de date initial in antrenare si validare

In [9]:
times = main_df.index.values
last_x_pct = main_df.index.values[-int(VAL_PCT * len(times))]

validation_main_df = main_df[main_df.index >= last_x_pct]
main_df = main_df[main_df.index < last_x_pct]

main_df.loc[main_df.index == last_x_pct] #the splitting place

Unnamed: 0_level_0,BCHUSDT-binance-close,BCHUSDT-binance-volume,BTCUSDT-binance-close,BTCUSDT-binance-volume,ETHUSDT-binance-close,ETHUSDT-binance-volume,LTCUSDT-binance-close,LTCUSDT-binance-volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


# Verificare valori nule

In [10]:
main_df.fillna(method="ffill", inplace=True)
validation_main_df.fillna(method="ffill", inplace=True)

main_df.dropna(inplace=True)
validation_main_df.dropna(inplace=True)
main_df.isna().sum(), validation_main_df.isna().sum()

(BCHUSDT-binance-close     0
 BCHUSDT-binance-volume    0
 BTCUSDT-binance-close     0
 BTCUSDT-binance-volume    0
 ETHUSDT-binance-close     0
 ETHUSDT-binance-volume    0
 LTCUSDT-binance-close     0
 LTCUSDT-binance-volume    0
 future                    0
 target                    0
 dtype: int64,
 BCHUSDT-binance-close     0
 BCHUSDT-binance-volume    0
 BTCUSDT-binance-close     0
 BTCUSDT-binance-volume    0
 ETHUSDT-binance-close     0
 ETHUSDT-binance-volume    0
 LTCUSDT-binance-close     0
 LTCUSDT-binance-volume    0
 future                    0
 target                    0
 dtype: int64)

# Preprocesare prin normalizare, scalare si balansare

In [11]:
def preprocessing_df(df):
    df.drop("future", axis=1) 

    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            
            
            df.dropna(inplace=True)

            df[col] = preprocessing.scale(df[col])
    df.dropna(inplace=True)
    sequencial_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    for row in df.values:
        prev_days.append([x for x in row[:-1]])
        if len(prev_days) == SEQ_LEN:
            #separate the label/target
            sequencial_data.append([np.array(prev_days), row[-1]])

    random.shuffle(sequencial_data)

    buys = []
    sells = []

    for seq, target in sequencial_data:
        if target == 0: # you need to sell
            sells.append([seq, target])
        elif target == 1: # you need to buy
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower_nr = min(len(buys), len(sells))

    buys = buys[:lower_nr]
    sells = sells[:lower_nr]

    sequencial_data = buys + sells

    random.shuffle(sequencial_data)

    x = []
    Y = []

    for seq, target in sequencial_data:
        x.append(seq)
        Y.append(target)
    return np.array(x), np.array(Y)

In [12]:
train_x, train_Y = preprocessing_df(main_df)
test_x, test_Y = preprocessing_df(validation_main_df)

In [13]:
print(f"Training data: {len(train_x)}, Validation data: {len(test_x)}")
print(f" Training set Sells: {(train_Y.astype('int32') == 0).sum()}, Buys: {(train_Y.astype('int32') == 1).sum()}")
print(f" Validation set Sells: {(test_Y.astype('int32') == 0).sum()}, Buys: {(test_Y.astype('int32') == 1).sum()}")

Training data: 38766, Validation data: 6888
 Training set Sells: 19383, Buys: 19383
 Validation set Sells: 3444, Buys: 3444


# Arhitectura modelului

In [14]:
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

model = Sequential()
model.add(LSTM(128, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(64, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(64, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(32, kernel_regularizer=l2(1e-6), recurrent_regularizer=l2(1e-6), bias_regularizer=l2(1e-6)))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

In [15]:
opt = Adam(learning_rate=1e-4, decay=1e-7)

LOSS='sparse_categorical_crossentropy'

model.compile(loss=LOSS, optimizer=opt, metrics=['accuracy'])

## Hyperparameters, tensorboard and checkpoint

In [16]:
EPOCHS = 10
BATCH_SIZE = 32

NAME = f"Model-final-arhitectura-mare"

In [17]:
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = "Model-final-RNN-{epoch:02d}-{val_accuracy:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Fitting the model

In [18]:
history = model.fit(train_x, train_Y, 
        batch_size=BATCH_SIZE, 
        epochs=EPOCHS,
        validation_data=(test_x, test_Y),
        callbacks=[tensorboard, checkpoint])

Epoch 1/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-01-0.597.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-01-0.597.model\assets


Epoch 2/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-02-0.768.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-02-0.768.model\assets


Epoch 3/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-03-0.938.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-03-0.938.model\assets


Epoch 4/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-04-0.954.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-04-0.954.model\assets


Epoch 5/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-05-0.967.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-05-0.967.model\assets


Epoch 6/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-06-0.973.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-06-0.973.model\assets


Epoch 7/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-07-0.970.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-07-0.970.model\assets


Epoch 8/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-08-0.974.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-08-0.974.model\assets


Epoch 9/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-09-0.960.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-09-0.960.model\assets


Epoch 10/10




INFO:tensorflow:Assets written to: models\Model-final-RNN-10-0.962.model\assets


INFO:tensorflow:Assets written to: models\Model-final-RNN-10-0.962.model\assets


# Evaluarea si salvarea modelului

In [19]:
# Scoring
score = model.evaluate(test_x, test_Y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Saving
model.save(f"models/{NAME}")

Test loss: 0.07825560867786407
Test accuracy: 0.9619628190994263




INFO:tensorflow:Assets written to: models/Model-final-arhitectura-mare\assets


INFO:tensorflow:Assets written to: models/Model-final-arhitectura-mare\assets


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 16, 128)           70656     
_________________________________________________________________
dropout (Dropout)            (None, 16, 128)           0         
_________________________________________________________________
batch_normalization (BatchNo (None, 16, 128)           512       
_________________________________________________________________
lstm_1 (LSTM)                (None, 16, 128)           131584    
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 128)           0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 128)           512       
_________________________________________________________________
lstm_2 (LSTM)                (None, 16, 64)            4