In [28]:
# number of total past observations from the original dataset to be considered
n_past_total = 1200

# number of past observations to be considered for the LSTM training and prediction
n_past = 30

# number of future datapoints to predict (if higher than 1, the model switch to Multi-Step)
n_future = 10

# activation function used for the RNN (softsign, relu, sigmoid)
activation = 'softsign'

# dropout for the hidden layers
dropout = 0.2

# number of hidden layers
n_layers = 8

# number of neurons of the hidden layers
n_neurons = 20

# features to be considered for training (if only one is Close, then its Univariate, if more, then it's Multivariate)
features = ['Close', 'Volume']
#features = ['Close']

# number of inputs features (if higher than 1, )
n_features = len(features)

# patience for the early stopping (number of epochs)
patience = 25

# optimizer (adam, RMSprop)
optimizer='adam'

In [29]:
import numpy as np
import pandas as pd
import datetime
from google.colab import drive
import glob
import os
from sklearn.preprocessing import MinMaxScaler


In [2]:
from google.colab import drive
# drive.mount("/content/drive")
path = '/content/drive/My Drive/BTC'


In [3]:
def dataset():
  df_= pd.DataFrame()
  lst_df=[]
  for filename in glob.glob(os.path.join(path, '*.csv')):
    df = pd.read_csv(filename, error_bad_lines=False, skiprows=1)
    lst_df.append(df)
    df_ = pd.concat(lst_df)
    length_ = len(df_['date'])   
    df_ = df_.set_index([pd.Index(range(0,length_))])
    df_ = df_.sort_values(['date'],ascending=True)
  return df_

In [4]:
BTC= dataset()
BTC

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
525598,1483228860,2017-01-01 00:01:00,BTC/USD,966.34,966.34,966.34,966.34,7.610000,7353.847400
525597,1483228920,2017-01-01 00:02:00,BTC/USD,966.16,966.37,966.16,966.37,8.087376,7815.397864
525596,1483228980,2017-01-01 00:03:00,BTC/USD,966.37,966.37,966.37,966.37,0.000000,0.000000
525595,1483229040,2017-01-01 00:04:00,BTC/USD,966.37,966.37,966.37,966.37,0.000000,0.000000
525594,1483229100,2017-01-01 00:05:00,BTC/USD,966.43,966.43,966.43,966.43,0.107000,103.408010
...,...,...,...,...,...,...,...,...,...
2064523,1633934340,2021-10-11 06:39:00,BTC/USD,56651.24,56679.20,56651.24,56679.20,0.332015,18818.317382
2064522,1633934400,2021-10-11 06:40:00,BTC/USD,56664.83,56683.60,56649.00,56683.60,0.241577,13693.444401
2064521,1633934460,2021-10-11 06:41:00,BTC/USD,56683.49,56683.49,56667.74,56676.79,0.022537,1277.327650
2064520,1633934520,2021-10-11 06:42:00,BTC/USD,56671.71,56680.74,56670.00,56680.74,3.151051,178603.886587


In [5]:
#split Test and train
def train_test_split():
  train= BTC.iloc[:2348820,:]
  test= BTC.iloc[2348820:,:]
  return train,test

In [8]:
train, test= train_test_split()
train.head(5)

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
525598,1483228860,2017-01-01 00:01:00,BTC/USD,966.34,966.34,966.34,966.34,7.61,7353.8474
525597,1483228920,2017-01-01 00:02:00,BTC/USD,966.16,966.37,966.16,966.37,8.087376,7815.397864
525596,1483228980,2017-01-01 00:03:00,BTC/USD,966.37,966.37,966.37,966.37,0.0,0.0
525595,1483229040,2017-01-01 00:04:00,BTC/USD,966.37,966.37,966.37,966.37,0.0,0.0
525594,1483229100,2017-01-01 00:05:00,BTC/USD,966.43,966.43,966.43,966.43,0.107,103.40801


In [21]:
# looking at the correlation of the main possible variables
final_train=train[['close','Volume USD']]
final_test= test[['close','Volume USD']]

In [22]:
#normalizing dataset with minmax scaler


from sklearn.preprocessing import MinMaxScaler
def minmax_train():
  sc = MinMaxScaler(feature_range=(0,1))
  SC_train = sc.fit_transform(final_train)
  return SC_train

def minmax_test():
  sc = MinMaxScaler(feature_range=(0,1))
  SC_test = sc.fit_transform(final_test)
  return SC_test

In [23]:
normalized_train= minmax_train()

In [24]:
normalized_train

array([[3.34283512e-03, 5.26481095e-04],
       [3.34330308e-03, 5.59524695e-04],
       [3.34330308e-03, 0.00000000e+00],
       ...,
       [4.76748811e-01, 1.41017372e-05],
       [4.77138783e-01, 7.06637620e-04],
       [4.77408175e-01, 2.69407517e-04]])

In [25]:
normalized_test= minmax_test()

In [26]:
normalized_train[0]

array([0.00334284, 0.00052648])

In [31]:
X_train = []
y_train = []

for i in range(n_past, len(normalized_train) - n_future + 1):
    X_train.append(normalized_train[i-n_past:i, :])
    y_train.append(normalized_train[i:i+n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape

((2348781, 30, 2), (2348781, 10))

In [32]:
import tensorflow as tf
import keras

In [33]:
###GRU Model
#our goal is to predict 1 min price of BTC.
look_back=4
from datetime import datetime
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from keras.optimizers import adam_v2
import os
import numpy as np
np.set_printoptions(suppress=True)
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x) #avoid scientific notation
import datetime
import math
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import Image

# Building the RNN

# Initialising the RNN
regressor = Sequential()

# Input layer
regressor.add(LSTM(units=n_past, return_sequences=True, activation=activation, input_shape=(X_train.shape[1], n_features))) 
#regressor.add(LSTM(units=neurons, return_sequences=True, activation=activation, input_shape=(X_train.shape[1], 1))) 

# Hidden layers
for _ in range(n_layers):
    regressor.add(Dropout(dropout))
    regressor.add(LSTM(units=n_neurons, return_sequences=True, activation=activation))

# Last hidden layer (changing the return_sequences)
regressor.add(Dropout(dropout))
regressor.add(LSTM(units=n_neurons, return_sequences=False, activation=activation))

# Adding the output layer
regressor.add(Dense(units=n_future))

# Compiling the RNN
regressor.compile(optimizer=optimizer, loss='mse')

# Model summary
regressor.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 30)            3960      
_________________________________________________________________
dropout (Dropout)            (None, 30, 30)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 20)            4080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 20)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 30, 20)            3280      
_________________________________________________________________
dropout_2 (Dropout)          (None, 30, 20)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 30, 20)           

In [35]:
# Adding early stopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)

In [36]:
# Fitting the RNN to the Training set
res = regressor.fit(X_train, y_train
                    , batch_size=32
                    , epochs=750
                    , validation_split=0.1
                    , callbacks=[early_stop])

Epoch 1/750
 1991/66060 [..............................] - ETA: 14:16:06 - loss: 5.3733e-04

KeyboardInterrupt: ignored