# BTC Price Prediction<br>LSTM Model
## Import Section

In [None]:
import pandas as pd
import numpy as np 
import os
import matplotlib.pyplot as plt
import time
import yfinance as yf

In [None]:
# Downloading the btc price data from yahoo finance
df = yf.download('BTC-USD', start='2017-01-01')

## EDA

In [None]:
df.info()

In [None]:
df.head() # Lookign at what the dataset contains

In [None]:
df = df[["Close"]] # Selecting the target variable

In [None]:
df.describe() # Seeing what the target variable is like

In [None]:
# Ploting the target variable
plt.figure(1, figsize=(16,6))
plt.plot(df.Close)

## Feature Engineering

In [None]:
df["Returns"] = df.Close.pct_change() # Adding a feature for returns of each day

In [None]:
df["Log_Returns"] = np.log(1 + df["Returns"]) # Taking the log of returns

In [None]:
# Plotting Log retuens to see if the mean is consistant throughout the data
plt.figure(1, figsize=(16,4))
plt.plot(df.Log_Returns)

In [None]:
df.dropna(inplace=True) # Dropping missing values
X = df[["Close", "Log_Returns"]].values # Settign the features that are going to be used for the model

## Test train Split, Scaling 

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler(feature_range = (0, 1)).fit(X) # Setting up the scaler for the chosen features and fitting them to the scaler
X_scaled = scaler.transform(X) # Transforming the scaled features

In [None]:
X_scaled[0] # Making sure they scaled correctly and have the correct datatype.

In [None]:
# Setting up the labels for the model.
y = [x[0] for x in X_scaled]

In [None]:
# Setting up the split for training and testing sets and checking how many will be splt into the training data.
split = int(len(X_scaled) * 0.8)
print(split)

In [None]:
X_train = X_scaled[:split] # Feature training set
X_test = X_scaled[split: len(X_scaled)] # Feature testing set
y_train = y[:split] # Label training set
y_test = y[split : len(y)] # Label testing set

In [None]:
# Making sure that the feature and labels for both sets are equal. Length of returns = Length of close
assert len(X_train) == len(y_train)
assert len(X_test) == len(y_test)

In [None]:
# Finalizing the test, train, split
# Converting into tensorflow readable arrays
n = 60
Xtrain = []
ytrain = []
Xtest = []
ytest = []
for i in range(n, len(X_train)):
    Xtrain.append(X_train[i - n : i, : X_train.shape[1]])
    ytrain.append(y_train[i]) # predict next record
for i in range(n, len(X_test)):
    Xtest.append(X_test[i - n : i, : X_test.shape[1]])
    ytest.append(y_test[i]) # predict next record

In [None]:
Xtrain[0]

In [None]:
ytrain[0]

In [None]:
# Making a validation set
val = np.array(ytrain[0])
val = np.c_[val, np.zeros(val.shape)]

In [None]:
# Scaling the Validation set 
scaler.inverse_transform(val)

In [None]:
# Reshaping arrays to fit in LSTM Model
Xtrain, ytrain = (np.array(Xtrain), np.array(ytrain))
Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], Xtrain.shape[1], Xtrain.shape[2]))

Xtest, ytest = (np.array(Xtest), np.array(ytest))
Xtest = np.reshape(Xtest, (Xtest.shape[0], Xtest.shape[1], Xtest.shape[2]))

In [None]:
# Checking to make sure that shape of each Feature and Label set are correct
print(Xtrain.shape)
print(ytrain.shape)
print("---")
print(Xtest.shape)
print(ytest.shape)

## Model Building

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [None]:
model = Sequential() # Setting up a sequential model
model.add(LSTM(4, input_shape=(Xtrain.shape[1], Xtrain.shape[2]))) # Adding the LSTM layer to the model
model.add(Dense(1)) # Adding a dense layer to the model
model.compile(loss="mean_squared_error", optimizer='adam') # Compiling the model and getting it ready for building
model.fit(Xtrain, ytrain, epochs=250, validation_data=(Xtest, ytest), batch_size=8, verbose=1) # Fitting the data to the model and building

In [None]:
model.summary() # Checkign what the model produced. 

In [None]:
# Setting up predictions for the model
trainPredict = model.predict(Xtrain)
testPredict = model.predict(Xtest)

In [None]:
trainPredict = np.c_[trainPredict, np.zeros(trainPredict.shape)]
testPredict = np.c_[testPredict, np.zeros(testPredict.shape)]

In [None]:
trainPredict = scaler.inverse_transform(trainPredict)
trainPredict = [x[0] for x in trainPredict]

testPredict = scaler.inverse_transform(testPredict)
testPredict = [x[0] for x in testPredict]

In [None]:
# Seeing what the model predicted
print(trainPredict[:5])
print(testPredict[:5])

## Model Performance

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
# Using RMSE to grade the model/See how well it performed
trainScore = mean_squared_error([x[0][0] for x in Xtrain], trainPredict, squared=False)
print("Train Score: %.2f RMSE" % (trainScore))

testScore = mean_squared_error([x[0][0] for x in Xtest], testPredict, squared=False)
print("Test Score: %.2f RMSE" % (testScore))