[View in Colaboratory](https://colab.research.google.com/github/brynmwangy/predicting-bitcoin-prices-using-LSTM/blob/master/btc.ipynb)

In [None]:
#install all the required dependancy libraries
!pip install tensorflow #for prediction
!pip install numpy  #for matrix multiplication
!pip install pandas #define the data structures
!pip install matplotlib #for visualization
!pip install scikit-learn #for normalizing our data(scaling)


In [None]:
#importing the libraries
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import io
%matplotlib inline
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.spatial.distance import cosine

In [None]:
#loading the dataset
from google.colab import files
uploaded = files.upload()

In [None]:
#decoding the files as uploaded will be a dictionary of keys (the file names) and values (the encoded file objects)
btc = pd.read_csv(io.StringIO(uploaded['btc.csv'].decode('utf-8')))
btc.head()

In [None]:
btc = pd.read_csv('btc.csv', encoding='utf-8')
btc.head()

In [None]:
#selecting only the column that we are going to use in the prediction process
data_to_use=btc['Close'].values
data_to_use

In [None]:
#data preprocessing(scaling)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data_to_use.reshape(-1, 1))
#plotting the data
import matplotlib.pyplot as plt
plt.figure(figsize=(12,7), frameon=False, facecolor='brown', edgecolor='blue')
plt.title('Bitcoin prices from December 2014 to May 2018')
plt.xlabel('Days')
plt.ylabel('Scaled price of Bitcoin')
plt.plot(scaled_data, label='Price')
plt.legend()
plt.show()

In [None]:
# This function is used to create Features and Labels datasets. By windowing the data.
#Input: data - dataset used in the project
#window_size - how many data points we are going to use to predict the next datapoint in the sequence 
#[Example: if window_size = 1 we are going to use only the previous day to predict todays stock prices]
#Outputs: X - features splitted into windows of datapoints (if window_size = 1, X = [len(data)-1, 1])
#y - 'labels', actually this is the next number in the sequence, this number we are trying to predict

def window_data(data, window_size):
    X = []
    y = []
    
    i = 0
    while (i + window_size) <= len(data) - 1:
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
        
        i += 1
    assert len(X) ==  len(y)
    return X, y
#windowing the data with window_data function
X, y = window_data(scaled_data, 7)


#we now split the data into training and test set
X_train  = np.array(X[:1018])
y_train = np.array(y[:1018])

X_test = np.array(X[1018:])
y_test = np.array(y[1018:])

print("X_train size: {}".format(X_train.shape))
print("y_train size: {}".format(y_train.shape))
print("X_test size: {}".format(X_test.shape))
print("y_test size: {}".format(y_test.shape))

In [None]:


def build_model(batch_size, window_size, hidden_layer, learning_rate):
    model = Sequential()
    model.add(LSTM(hidden_layer, batch_input_shape=(batch_size, window_size, 1)))
    model.add(Dense(1))

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss=MeanSquaredError(), optimizer=optimizer)
    
    return model


# Parâmetros para o grid search
param_grid = {
    'batch_size': [32, 64, 128],
    'window_size': [5, 7, 10],
    'hidden_layer': [128, 256, 512],
    'learning_rate': [0.001, 0.01, 0.1],
    'epochs': [100, 200, 300]
}

# Use o wrapper do keras para o sklearn
model = KerasRegressor(build_fn=build_model, verbose=1)

# Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, y_train)

# Mostra os resultados
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")


In [None]:
grid_result.best_params_

In [None]:
#IMPRIMIR METRICAS

In [None]:


# Preveja os valores com o modelo treinado
y_pred = grid_result.predict(X_test).flatten()

# Certifique-se de que y_test é 1-D
y_test_1d = y_test.flatten()

# Calcule e imprima MSE
mse = mean_squared_error(y_test_1d, y_pred)
print(f"MSE: {mse}")

# Calcule e imprima RMSE
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")

# Calcule e imprima COSINE
cosine_similarity = 1 - cosine(y_test_1d, y_pred)
print(f"COSINE: {cosine_similarity}")



In [None]:
# Preparar os dados para plotar
train_plot = np.empty_like(scaled_data)
train_plot[:, :] = np.nan
train_plot[7:len(y_train)+7, :] = scaler.inverse_transform(y_train.reshape(-1, 1))

test_plot = np.empty_like(scaled_data)
test_plot[:, :] = np.nan
test_plot[len(y_train)+7-1:len(scaled_data)-1, :] = scaler.inverse_transform(y_pred.reshape(-1, 1))

# Plotar dados originais, de treinamento e de teste
plt.figure(figsize=(15,7))
plt.plot(scaler.inverse_transform(scaled_data), label='Original data')
plt.plot(train_plot, label='Training data')
plt.plot(test_plot, label='Testing data')
plt.title('Bitcoin prices with Training and Testing Data')
plt.xlabel('Days')
plt.ylabel('Price of Bitcoin')
plt.legend()
plt.show()
