In [1]:

# Library Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
plt.style.use("ggplot")

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
data=pd.read_csv('/content/BTC-USD.csv')      #reading the data
print(data.shape)
data=data.dropna()
data.isnull().sum()
print(data.shape)
data.dtypes
data.Close

(366, 7)
(363, 7)


0       8912.654297
1       9003.070313
2       9268.761719
3       9951.518555
4       9842.666016
           ...     
361    57750.175781
362    57828.050781
363    56631.078125
364    57200.292969
365    55419.718750
Name: Close, Length: 363, dtype: float64

In [3]:
# Data Preprocessing
### Setting the datetime index as the date, only selecting the 'Close' column, then only the last 1000 closing prices.
df=data
df = df.set_index("Date")[['Close']].tail(1000)
df = df.set_index(pd.to_datetime(df.index))
# Normalizing/Scaling the Data
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-05-04,0.005662
2020-05-05,0.007309
2020-05-06,0.012148
2020-05-07,0.024584
2020-05-08,0.022602
...,...
2021-04-30,0.895208
2021-05-01,0.896626
2021-05-02,0.874824
2021-05-03,0.885192


In [4]:

# How many periods looking back to train
n_per_in  = 30

# How many periods ahead to predict
n_per_out = 10

# Features (in this case it's 1 because there is only one feature: price)
n_features = 1

# Splitting the data into appropriate sequences
# X, y = split_sequence(list(df.Close), n_per_in, n_per_out)

In [5]:
X, y = [], []
seq=list(df.Close)
for i in range(len(seq)):
    end = i + n_per_in
    out_end = end + n_per_out
    
    if out_end > len(seq):
        break
    
    seq_x, seq_y = seq[i:end], seq[end:out_end]
    
    X.append(seq_x)
    y.append(seq_y)
pd.DataFrame({'input':X,'output':y})


Unnamed: 0,input,output
0,"[0.005662094599413664, 0.007308966736505607, 0...","[0.019214753310367733, 0.02183614841991377, 0...."
1,"[0.007308966736505607, 0.012148372330379326, 0...","[0.02183614841991377, 0.019375320947449703, 0...."
2,"[0.012148372330379326, 0.02458436828951821, 0....","[0.019375320947449703, 0.01915941646012853, 0...."
3,"[0.02458436828951821, 0.022601686222097628, 0....","[0.01915941646012853, 0.021075075273169125, 0...."
4,"[0.022601686222097628, 0.01807050215833403, 0....","[0.021075075273169125, 0.021305245339779455, 0..."
...,...,...
319,"[0.8365384158730717, 0.8403598081164094, 0.804...","[0.8251898578487411, 0.7861415601878271, 0.773..."
320,"[0.8403598081164094, 0.8045743756275721, 0.785...","[0.7861415601878271, 0.7739630380775352, 0.754..."
321,"[0.8045743756275721, 0.785083062488473, 0.8476...","[0.7739630380775352, 0.7549693476412142, 0.735..."
322,"[0.785083062488473, 0.8476158214824705, 0.8628...","[0.7549693476412142, 0.7359059302436136, 0.827..."


In [8]:
# Reshaping the X variable from 2D to 3D
X=np.array(X)
y=np.array(y)

print(X.shape)
X=X.reshape(-1,30,1)
print(X.shape)

# X = X.reshape((X.shape[0], X.shape[1], n_features))

(324, 30, 1)
(324, 30, 1)


In [10]:

def layer_maker(n_layers, n_nodes, activation, drop=None, d_rate=.5):                                    #creating model and adding layers
    """
    Create a specified number of hidden layers for an RNN
    Optional: Adds regularization option, dropout layer to prevent potential overfitting if necessary
    """
    
    # Creating the specified number of hidden layers with the specified number of nodes
    for x in range(1,n_layers+1):
        model.add(LSTM(n_nodes, activation=activation, return_sequences=True))

        # Adds a Dropout layer after every Nth hidden layer (the 'drop' variable)
        try:
            if x % drop == 0:
                model.add(Dropout(d_rate))
        except:
            pass


# Instantiating the model
model = Sequential()

# Activation
activ = 'relu'#"softsign"
# activ = "softsign"

# Input layer
model.add(LSTM(30, activation=activ, return_sequences=True, input_shape=(n_per_in, n_features)))

# Hidden layers
layer_maker(n_layers=2, n_nodes=12, activation=activ)

# Final Hidden layer
model.add(LSTM(10, activation=activ))

# Output layer
model.add(Dense(n_per_out))

# Model summary
# model.summary()





In [None]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')                                 # trainig the model
res = model.fit(X, np.array(y), epochs=10, batch_size=10, validation_split=0.1)


In [None]:

def visualize_training_results(results):
    """
    Plots the loss and accuracy for the training and testing data
    """
    history = results.history
    plt.figure(figsize=(12,4))
    plt.plot(history['val_loss'])
    plt.plot(history['loss'])
    plt.legend(['val_loss', 'loss'])
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.show()
    
    # plt.figure(figsize=(12,4))
    # plt.plot(history['val_accuracy'])
    # plt.plot(history['accuracy'])
    # plt.legend(['val_accuracy', 'accuracy'])
    # plt.title('Accuracy')
    # plt.xlabel('Epochs')
    # plt.ylabel('Accuracy')
    # plt.show()
visualize_training_results(res)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,5))

# Getting predictions by predicting from the last available X variable
yhat = model.predict(X[-1].reshape(1, n_per_in, n_features)).tolist()[0]

# Transforming values back to their normal prices
yhat = scaler.inverse_transform(np.array(yhat).reshape(-1,1)).tolist()

# Getting the actual values from the last available y variable which correspond to its respective X variable
actual = scaler.inverse_transform(np.array(y[-1]).reshape(-1,1))

# Printing and plotting those predictions
print("Predicted Prices:\n", yhat)
plt.plot(yhat, label='Predicted')

# Printing and plotting the actual values
print("\nActual Prices:\n", actual.tolist())
plt.plot(actual.tolist(), label='Actual')

plt.title(f"Predicted vs Actual Closing Prices")
plt.ylabel("Price")
plt.legend()
plt.savefig("BTC_validation.png")
plt.show()

In [None]:
# Predicting off of y because it contains the most recent dates
yhat = model.predict(np.array(df.tail(n_per_in)).reshape(1, n_per_in, n_features)).tolist()[0]

# Transforming the predicted values back to their original prices
yhat = scaler.inverse_transform(np.array(yhat).reshape(-1,1)).tolist()

# Creating a DF of the predicted prices
preds = pd.DataFrame(yhat, index=pd.date_range(start=df.index[-1], periods=len(yhat), freq="D"), columns=df.columns)

# Printing the predicted prices
print(preds)

# Number of periods back to visualize the actual values
pers = 10

# Transforming the actual values to their original price
actual = pd.DataFrame(scaler.inverse_transform(df[["Close"]].tail(pers)), index=df.Close.tail(pers).index, columns=df.columns).append(preds.head(1))

# Plotting
plt.figure(figsize=(16,6))
plt.plot(actual, label="Actual Prices")
plt.plot(preds, label="Predicted Prices")
plt.ylabel("Price")
plt.xlabel("Dates")
plt.title(f"Forecasting the next {len(yhat)} days")
plt.legend()
plt.savefig("BTC_predictions.png")
plt.show()