# PREDICTING OPEN AND CLOSE PRICE OF BITCOIN WITH RECURRENT NEURAL NETWORK.

For this project, i will use High Low, Adj Close, and Volume, to predict Open and CLose prices for BTC-USD

In [1]:
# import libraries
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import random
import tensorflow as tf
import keras
tf.random.set_seed(34)
from sklearn import metrics 

In [2]:
# import libraries
df = pd.read_csv('BTC-USD.csv')
df.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700
3,2014-09-20,394.673004,423.29599,389.882996,408.903992,408.903992,36863600
4,2014-09-21,408.084991,412.425995,393.181,398.821014,398.821014,26580100
5,2014-09-22,399.100006,406.915985,397.130005,402.152008,402.152008,24127600
6,2014-09-23,402.09201,441.557007,396.196991,435.790985,435.790985,45099500
7,2014-09-24,435.751007,436.112,421.131989,423.204987,423.204987,30627700
8,2014-09-25,423.156006,423.519989,409.467987,411.574005,411.574005,26814400
9,2014-09-26,411.428986,414.937988,400.009003,404.424988,404.424988,21460800


In [3]:
# data information 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2747 entries, 0 to 2746
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       2747 non-null   object 
 1   Open       2747 non-null   float64
 2   High       2747 non-null   float64
 3   Low        2747 non-null   float64
 4   Close      2747 non-null   float64
 5   Adj Close  2747 non-null   float64
 6   Volume     2747 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 150.4+ KB


# DATA PROCESSING 

In [4]:
# sanity-check for missng values 
df.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [5]:
# drop date column
df.drop('Date', axis=1, inplace=True)

In [6]:
# scale data 
scaler = MinMaxScaler(feature_range=(0,1))
scaled_df = scaler.fit_transform(df.to_numpy())
scaled_df = pd.DataFrame(scaled_df, columns = list(df.columns))

In [7]:
# split data 
trainData = scaled_df[['High', 'Low', 'Adj Close', 'Volume']]
testData = scaled_df[['Open', 'Close']]


#size - 80:20 split
xTrain, xTest, yTrain, yTest = train_test_split(trainData, testData, random_state=130, train_size=0.8, shuffle=True)

In [8]:
# size of splits
print(xTrain.shape, yTest.shape)

(2197, 4) (550, 2)


In [11]:
# reshape data to be suitable for modelling 
xTrain, xTest, yTrain, yTest  = np.array(xTrain), np.array(xTest), np.array(yTrain), np.array(yTest)

In [25]:
xTest.shape

(550, 4)

In [28]:
# reshape data 
xTrain = xTrain.reshape((2197, 1, 4))

# Modelling 

In [22]:
# rnn 
reg = keras.Sequential()

# add RNN layers
reg.add(keras.layers.SimpleRNN(units = 50, input_shape=(1,4), activation='relu'))
# add droput
reg.add(keras.layers.Dropout(0.2))

# output
reg.add(keras.layers.Dense(2, activation='linear'))

# compile
reg.compile(loss='MeanSquaredError', metrics =['MAE'])
reg.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_5 (SimpleRNN)    (None, 50)                2750      
                                                                 
 dropout_3 (Dropout)         (None, 50)                0         
                                                                 
 dense_2 (Dense)             (None, 2)                 102       
                                                                 
Total params: 2,852
Trainable params: 2,852
Non-trainable params: 0
_________________________________________________________________


In [29]:
# fit model 
reg.fit(xTrain, yTrain, epochs = 10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x264d3557610>

We got a loss of 0.0012 and MAE of 0.0195, pretty good.

In [31]:
# make prediction 
prediction = reg.predict(xTest)



In [33]:
# mse
mse = metrics.mean_squared_error(yTest, prediction)
print(f'Mean Squared Error on the test is: {mse}')

Mean Squared Error on the test is: 0.00015290043913546083


The model demonstrated strong performance in predicting Bitcoin's open and close prices, achieving a Mean Squared Error (MSE) of 0.00015. This low MSE indicates that the predictions closely align with actual prices, underscoring the model's accuracy and reliability. Such performance is particularly beneficial for traders and investors who depend on precise price forecasting for their decision-making processes.