# Cryptocurrency price prediction with recurring network

##### The value of a cryptocurrency, as well as an active functioning of the stock market, can be configured with a time series. Here, we consider the weighted value of Bitcoin's daily price to construct our series. The purpose of this study is to predict the next value, based on the latest cryptocurrency values. Thus, I will use LSTM network, as they have memory, which is important when dealing with sequential data.


# Import as organized libraries

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Dense , LSTM , Dropout , Bidirectional
from keras.models import Sequential
from datetime import datetime
from numpy import sqrt
import matplotlib.pyplot as plt
import missingno as msno
import warnings

import pandas as pd
import numpy as np
import seaborn as sns

warnings.filterwarnings('ignore')
%matplotlib inline


In [None]:
#import dataset
data = pd.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')
data.head()

We have in our dataset the data of the opening, closing, highest value, lowest value, daily volume of bitcoin. Also, we have the price weighted by the purchase/sale values of a period, which in our case is daily.

In [None]:
# checking information about the dataframe 
data.info()

In [None]:
# describing statistics about the dataframe
data.describe(include="all")

In [None]:
# dataframe size
tam1 = data.shape[0]
data.shape

# Display the missing value 

In [None]:
import missingno as msno
%matplotlib inline
msno.matrix(data, color=(.07,.03,.5));

#  Amount of missing value
print("Amount of missing value by columns: ")
data.isnull().sum() 

In [None]:
# Remove missing value and check 
data = data.dropna().reset_index(drop=True)
#  Amount of missing value
print("Amount of missing value by columns: ")
data.isnull().sum() 

In [None]:
tam2 = data.shape[0]
p_remove = 100 - (tam2*100)/tam1
print(p_remove,"% of data were removed from the dataset.")

## Creating new column

In [None]:
# create a new column with date 
data.Timestamp = pd.to_datetime(data.Timestamp , unit = 's')
data['date']= data.Timestamp.dt.date
data.head()

In [None]:
data = data.groupby(data.date).mean()
data.head()

# Visualization

In [None]:
df = pd.DataFrame(data, columns=['Open', 'High', 'Low', 'Close'])
df.plot.box()

In [None]:
# imprima os dados
plt.plot(data['Weighted_Price'])

In [None]:
#Data correlation
plt.figure(figsize = (10,5))
sns.heatmap(data.corr(), annot = True)


# Data pre-processing

In [None]:
#There are some points with outliers, let's change to NaN and then call a method to fill the empty values
data['Weighted_Price'].replace(0, np.nan, inplace=True)
data['Weighted_Price'].fillna(method='ffill', inplace=True)

In [None]:
# Let's use the weighted price as input to our recurring network
# Normalizes in the range between [0 and 1]
values = data['Weighted_Price'].values.reshape(-1,1)
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

## Split data in train and test

In [None]:
# 70% to train and 30% to test.
train_size = int(len(scaled) * 0.7)
test_size = len(scaled) - train_size
train, test = scaled[0:train_size,:], scaled[train_size:len(scaled),:]
print(len(train), len(test))

In [None]:
#função para criar os conjuntos de dados de treino
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    print(len(dataY))
    return np.array(dataX), np.array(dataY)

### Let's consider a single day window to consider a prediction. For that, use a function create_dataset(..) and leave the parameter look_back=1. The look_back parameter controls the amount of data that will be part of the input window for the network. Study and understand what the function does.

In [None]:
# Enter with 1 single value window (Just one day)
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
# reshape to neural network input format (instances, 1, 1)
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# LSTM Model

In [None]:
model = Sequential()
model.add(Bidirectional(LSTM(32)))
model.add(Dense(units=128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units = 1))
model.build(input_shape= (trainX.shape[0], trainX.shape[1], trainX.shape[2]))

model.summary()

## Function for training 

In [None]:
def train_model(model, loss, optimizer, trainX, trainY):
  
  model.compile(loss=loss, optimizer=optimizer)
  #train model
  history = model.fit(trainX, trainY, epochs=30, batch_size= 200,  validation_data = (testX, testY))
  # plot cost 
  plt.plot(history.history['loss'], label='train')
  plt.plot(history.history['val_loss'], label='test')
  plt.legend()
  plt.show()



## Function to evaluate model


In [None]:
def evaluate_model(model, testX, testY):

  yhat = model.predict(testX)
  plt.title('Curve of actual value and predicted value on the scale used in training')
  plt.plot(yhat, label='predict')
  plt.plot(testY, label='true')
  plt.legend()
  plt.show()
  
  yhat_inverse = scaler.inverse_transform(yhat.reshape(-1, 1))
  testY_inverse = scaler.inverse_transform(testY.reshape(-1, 1))

  rmse = sqrt(mean_squared_error(testY_inverse, yhat_inverse))
  print('Test RMSE: %.3f' % rmse)

  plt.title('Actual value curve and predicted value in US dollars')
  plt.plot(yhat_inverse, label='predict')
  plt.plot(testY_inverse, label='actual', alpha=0.5)
  plt.legend()
  plt.show()

# Training and evaluating model

In [None]:
train_model(model, 'mse', 'adam', trainX, trainY)

In [None]:
# test
evaluate_model(model, testX, testY)

# Predicting the next day

In [None]:
def create_time_steps(length):
  time_steps = []
  for i in range(-length, 0, 1):
    time_steps.append(i)
  return time_steps

def baseline(history):
  return np.mean(history)
  
def show_plot(plot_data, delta, title):
  labels = ['History', 'True Future', 'Model Prediction']
  marker = ['.-', 'rx', 'go']
  time_steps = create_time_steps(plot_data[0].shape[0])
  if delta:
    future = delta
  else:
    future = 0

  plt.title(title)
  for i, x in enumerate(plot_data):
    if i:
      plt.plot(future, plot_data[i], marker[i], markersize=10,
               label=labels[i])
    else:
      plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
  plt.legend()
  plt.xlim([time_steps[0], (future+5)*2])
  plt.xlabel('Time-Step')
  return plt

In [None]:
def predict_next_day(model, testX, testY):
 
  yhat_inverse = scaler.inverse_transform(model.predict(testX).reshape(-1, 1))
  testY_inverse = scaler.inverse_transform(testY.reshape(-1, 1))

  show_plot([testY_inverse[0:200], yhat_inverse[201],baseline(testY_inverse[201])], 1, 'Next day prediction')
  
  print('predicted value of day 201: ', yhat_inverse[201])
  print('Actual value of day 201: ', testY_inverse[201])

In [None]:
predict_next_day(model, testX, testY)