In [2]:
import keras

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.layers import Dropout
from keras.models import model_from_json

import pandas as pd
import numpy as np
import pandas_ta as ta

from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt

import os
import sys

In [3]:
path_to_data = os.path.join("..", "ProcessingData", "BTC Minute CSVs", "BTCUSER_all_processed.parquet")
df = pd.read_parquet(path_to_data, engine='fastparquet')

Processing the data

In [4]:
indexes = [i for i in range(0, len(df)) if i % (30) == 0]

In [5]:
temp = df.iloc[indexes]
temp = temp.loc[df['Date'] > '2019-06-01 00:00:00']
df = temp

Adding RSI, MACD and all

In [6]:
def calculate_rsi(df: pd.DataFrame, length: int = 14):
    if not isinstance(length, int):
        raise Exception("length must be an integer!")
    df[f'RSI_{length}'] = ta.rsi(df['Close'], length=length)

def calculate_sma(df: pd.DataFrame, length: int = 50):
    """
    Wrapper function for sma
    :param df:
    :param length:
    :return:
    """
    if not isinstance(length, int):
        raise Exception("length must be an integer!")
    df[f'SMA_{length}'] = ta.sma(df['Close'], length=length)

def calculate_macd(df: pd.DataFrame, fast: int = 12, slow: int = 26, signal: int = 9):
    """
    Wrapper function for MACD
    :param df:
    :param fast:
    :param slow:
    :param signal:
    :return:
    """
    if not isinstance(fast, int):
        raise Exception("fast must be an integer!")
    if not isinstance(slow, int):
        raise Exception("slow must be an integer!")
    if not isinstance(signal, int):
        raise Exception("signal must be an integer!")
    df[f'MACD_{fast}_{slow}_{signal}'] = ta.macd(close=df['Close'], fast=fast, slow=slow, signal=signal)[
        f'MACD_{fast}_{slow}_{signal}']

In [7]:
rsi_length = 14
sma_length = 50
macd_length_fast, macd_length_slow, macd_length_signal = 12, 26, 9

calculate_rsi(df, length=rsi_length)
calculate_sma(df, length=sma_length)
calculate_macd(df, fast=macd_length_fast, slow=macd_length_slow, signal=macd_length_signal)

In [10]:
non_nan = max(rsi_length, sma_length, macd_length_slow, macd_length_fast, macd_length_signal)
df = df[non_nan:]


In [11]:
feature_columns = ['Close', f'RSI_{rsi_length}', f'SMA_{sma_length}', f'MACD_{macd_length_fast}_{macd_length_slow}_{macd_length_signal}']
# feature_columns = ['Close', f'RSI_{rsi_length}']

In [12]:
def create_scaler_for_feature(df, feature):
    data_values = df[feature].astype('float32')
    train_scl = data_values[0:int(len(data_values) * 0.80)]
    scl = MinMaxScaler()

    scl.fit(train_scl.values.reshape(-1, 1))
    return scl

In [13]:
scalers = dict()
for feature in feature_columns:
    scalers[feature] = create_scaler_for_feature(df, feature)

In [14]:
data = df[:]

In [15]:
features_values = []
for feature in feature_columns:
    feature_values = df[feature].astype('float32')
    if 'RSI' in feature:
        feature_values = scalers[feature].inverse_transform(scalers[feature].transform(feature_values.values.reshape(-1, 1)))
    else:
        feature_values = scalers[feature].transform(feature_values.values.reshape(-1, 1))
    features_values.append(np.copy(feature_values))

In [16]:
features_values

[array([[0.07184964],
        [0.07139012],
        [0.07160842],
        ...,
        [0.7389783 ],
        [0.7459916 ],
        [0.74105555]], dtype=float32),
 array([[58.397766],
        [52.51088 ],
        [54.839935],
        ...,
        [52.930786],
        [57.422066],
        [53.54923 ]], dtype=float32),
 array([[0.06072222],
        [0.06074119],
        [0.06076469],
        ...,
        [0.7575417 ],
        [0.75611436],
        [0.7545878 ]], dtype=float32),
 array([[0.5457716 ],
        [0.5454984 ],
        [0.5455901 ],
        ...,
        [0.5506796 ],
        [0.56746703],
        [0.57238805]], dtype=float32)]

In [23]:
def process_data(data, lb):
    X,Y = [],[]
    for i in range(len(data)-lb):
        X.append(data[i:(i+lb),0])
        Y.append(data[(i+lb),0])
    return np.array(X),np.array(Y)

lb = 50
X_data = []
Y_data = []
for feature in features_values:
    X, Y = process_data(feature, lb)
    X_data.append(np.copy(X))
    Y_data.append(np.copy(Y))
feature_columns

['Close', 'RSI_14', 'SMA_50', 'MACD_12_26_9']

In [24]:
#concatenating the lines
# X_full, Y_full = np.hstack(X_data), Y_data[0]
X_full, Y_full = X_data[0], Y_data[0]

In [25]:
#separating into test and train
split = 0.8
X_train, X_test = X_full[:int(X_full.shape[0]*split)], X_full[int(X_full.shape[0]*split):]
Y_train, Y_test = Y_full[:int(Y_full.shape[0]*split)],Y_full[int(Y_full.shape[0]*split):]
X_train.shape

(33592, 50)

In [630]:
prices = X_data[0]
rsi = X_data[1]
sma = X_data[2]
macd = X_data[3]
X_train_prices, X_test_prices = prices[:int(prices.shape[0]*split)], prices[int(prices.shape[0]*split):]
X_train_rsi, X_test_rsi = rsi[:int(rsi.shape[0]*split)], rsi[int(rsi.shape[0]*split):]
X_train_sma, X_test_sma = sma[:int(sma.shape[0]*split)], sma[int(sma.shape[0]*split):]
X_train_macd, X_test_macd = macd[:int(macd.shape[0]*split)], macd[int(macd.shape[0]*split):]


Creating and training the model

In [26]:
# input_length = lb * len(feature_columns)
#
model = Sequential()
model.add(LSTM(units=96, return_sequences=True, input_shape=(lb, 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [632]:
# from keras import Input
# from keras.models import Model
# from keras.layers import Concatenate
#
# #using functional API
#
# input_prices = Input(shape=(lb, 1), dtype='float32')
# prices_layer_1 = LSTM(units=96, return_sequences=True)(input_prices)
# prices_layer_2 = Dropout(0.2)(prices_layer_1)
# prices_layer_3 = LSTM(units=96)(prices_layer_2)
# prices_layer_4 = Dropout(0.2)(prices_layer_3)
#
#
#
# input_rsi = Input(shape=(lb, 1), dtype='float32')
# rsi_layer_1 = LSTM(units=96, return_sequences=True)(input_rsi)
# rsi_layer_2 = Dropout(0.2)(rsi_layer_1)
# rsi_layer_3 = LSTM(units=96)(rsi_layer_2)
# rsi_layer_4 = Dropout(0.2)(rsi_layer_3)
#
#
# input_sma = Input(shape=(lb, 1), dtype='float32')
# sma_layer_1 = LSTM(units=96, return_sequences=True)(input_sma)
# sma_layer_2 = Dropout(0.2)(sma_layer_1)
# sma_layer_3 = LSTM(units=96)(sma_layer_2)
# sma_layer_4 = Dropout(0.2)(sma_layer_3)
#
#
#
# input_macd = Input(shape=(lb, 1), dtype='float32')
# macd_layer_1 = LSTM(units=96, return_sequences=True)(input_macd)
# macd_layer_2 = Dropout(0.2)(macd_layer_1)
# macd_layer_3 = LSTM(units=96)(macd_layer_2)
# macd_layer_4 = Dropout(0.2)(macd_layer_3)
#
#
#
# input = Concatenate()([prices_layer_4, rsi_layer_4, sma_layer_4, macd_layer_4])
#
#
#
# x = Dense(32, activation='relu', kernel_initializer='he_uniform')(input)
# x = Dense(1)(x)
#
# model = Model(inputs=[input_prices, input_rsi, input_sma, input_macd], outputs=x)
# model.compile(optimizer='adam', loss='mse')

In [633]:
# X_train = X_train.reshape((X_train.shape[0],X_train.shape[1],1))

In [28]:
# train_set = [X_train_prices, X_train_rsi, X_train_sma, X_train_macd]
# test_set = [X_test_prices, X_test_rsi, X_test_sma, X_test_macd]
for i in range(0, 100):
    model.fit(X_train,Y_train,epochs=10,validation_data=(X_test,Y_test),shuffle=False)
    model.save(f'Models\\model_price_{i}')

model.summary()

Epoch 1/10
Epoch 2/10
 165/1050 [===>..........................] - ETA: 1:55 - loss: 0.0303

KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(12,8))

Xt = model.predict(X_test)
plt.plot(scalers['Close'].inverse_transform(Y_test.reshape(-1,1)), label="Actual")
plt.plot(scalers['Close'].inverse_transform(Xt.reshape(-1, 1)), label="Predicted")
plt.legend()
plt.title("Train Dataset")

In [None]:
X_test.shape