# Bit coin close prediction using RNN & LSTM

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**In RNN,as the gradient of the training samples gets propagated backward through our network, it gets weaker and weaker, by the time it gets to those neurons that represent older data points in our time-series it has no juice to adjust them properly. This problem is called Vanishing Gradient. A LSTM cell is a type of RNN which stores important information about the past and forgets the unimportant pieces. In this way, when gradient back-propagates, it won’t be consumed by unnecessary information.**

In [None]:
df=pd.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.dtypes

In [None]:
df[['Close']].boxplot()

**As timestamp field is of int type, we have to convert that to date type**

In [None]:
df['Date']=pd.to_datetime(df['Timestamp'],unit='s').dt.date
df.head()

In [None]:
group=df.groupby('Date')
group.head()

In [None]:
data=group['Close'].mean()
data.head()

In [None]:
data.shape

In [None]:
data.isnull().sum()

In [None]:
data.head()

# Train test split

In [None]:
len(data)

**Taking last 50 records for test & remaining for train set**

In [None]:
x_train=data.iloc[:len(data)-50]
x_test=data.iloc[len(x_train):]

In [None]:
x_train.shape

In [None]:
x_test.shape

# Feature scaling

In [None]:
x_train=np.array(x_train)
x_train.shape

In [None]:
x_train=x_train.reshape(x_train.shape[0],1)
x_train.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
xtrain_scaled=scaler.fit_transform(x_train)

In [None]:
type(xtrain_scaled)

In [None]:
xtrain_scaled.shape

In [None]:
timestep=50
x_train=[]
y_train=[]

for i in range(timestep,xtrain_scaled.shape[0]):
    x_train.append(xtrain_scaled[i-timestep:i,0])
    y_train.append(xtrain_scaled[i,0])

**In the above loop, we took the previous value of a set at y and nxt 50 samples at x, for training the RNN**

In [None]:
len(x_train)

In [None]:
x_train

In [None]:
x_train,y_train=np.array(x_train),np.array(y_train)

In [None]:
x_train.shape

In [None]:
y_train

In [None]:
y_train

In [None]:
x_train=x_train.reshape(x_train.shape[0],x_train.shape[1],1) #reshaped for RNN
print("x_train shape= ",x_train.shape)
print("y_train shape= ",y_train.shape)

# RNN model

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense,SimpleRNN,Dropout,Flatten

In [None]:
reg=Sequential()

reg.add(SimpleRNN(128,activation='relu',return_sequences=True,input_shape=(x_train.shape[1],1)))
reg.add(Dropout(0.25))

reg.add(SimpleRNN(256,return_sequences=True,activation='relu'))
reg.add(Dropout(0.25))

reg.add(SimpleRNN(512,return_sequences=True,activation='relu'))
reg.add(Dropout(0.35))

reg.add(SimpleRNN(256,return_sequences=True,activation='relu'))
reg.add(Dropout(0.25))

reg.add(SimpleRNN(128,return_sequences=True,activation='relu'))
reg.add(Dropout(0.25))

reg.add(Flatten())

reg.add(Dense(1))


reg.compile(optimizer='adam',loss='mean_squared_error')
reg.fit(x_train,y_train,epochs=100,batch_size=64)


# Processing test data for prediction

In [None]:
inputs=data[len(data)-len(x_test)-timestep:]
inputs=inputs.values.reshape(-1,1)
inputs=scaler.transform(inputs)

In [None]:
xtest=[]
for i in range(timestep,inputs.shape[0]):
    xtest.append(inputs[i-timestep:i,0])
xtest=np.array(xtest)
xtest=xtest.reshape(xtest.shape[0],xtest.shape[1],1)

In [None]:
predicted_data=reg.predict(xtest)
predicted_data=scaler.inverse_transform(predicted_data)

In [None]:
data_test=np.array(x_test)
data_test=data_test.reshape(len(data_test),1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(8,4), dpi=80, facecolor='w', edgecolor='k')
plt.plot(data_test,color="r",label="true result")
plt.plot(predicted_data,color="b",label="predicted result")
plt.legend()
plt.xlabel("Time(50 days)")
plt.ylabel("Close Values")
plt.grid(True)
plt.show()

# Using LSTM

In [None]:
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout,Flatten

model=Sequential()

model.add(LSTM(10,input_shape=(None,1),activation="relu"))

model.add(Dense(1))

model.compile(loss="mean_squared_error",optimizer="adam")

model.fit(x_train,y_train,epochs=100,batch_size=32)

In [None]:
inputs=data[len(data)-len(x_test)-timestep:]
inputs=inputs.values.reshape(-1,1)
inputs=scaler.transform(inputs)

In [None]:
xtest=[]
for i in range(timestep,inputs.shape[0]):
    xtest.append(inputs[i-timestep:i,0])
xtest=np.array(xtest)
xtest=xtest.reshape(xtest.shape[0],xtest.shape[1],1)

In [None]:
predicted_data=model.predict(xtest)
predicted_data=scaler.inverse_transform(predicted_data)

In [None]:
data_test=np.array(x_test)
data_test=data_test.reshape(len(data_test),1)

In [None]:
plt.figure(figsize=(8,4), dpi=80, facecolor='w', edgecolor='k')
plt.plot(data_test,color="r",label="true result")
plt.plot(predicted_data,color="b",label="predicted result")
plt.legend()
plt.xlabel("Time(50 days)")
plt.ylabel("Close Values")
plt.grid(True)
plt.show()

**From the above two plots, we can able to see that the LSTM model performs well than the RNN model**