In [None]:
# Change Keras backend
from keras import backend as K
import os
os.environ['KERAS_BACKEND'] = "cntk"

In [None]:
# install dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.model_selection import  train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
ori_df = pd.read_csv("./gemini_BTCUSD_2015_1min.csv")
for item in range(2016,2022):
    df = pd.read_csv(f"./gemini_BTCUSD_{item}_1min.csv")
    new_df = pd.concat([ori_df,df], ignore_index=True)
    ori_df = new_df

In [None]:
ori_df.rename({"Unix Timestamp":"Timestamp"}, axis=1, inplace = True)

In [None]:
combine_df = ori_df

In [None]:
combine_df.drop_duplicates(subset=["Timestamp"], keep='first',inplace=True)

In [None]:
combine_df.head()

In [None]:
combine_df.tail()

In [None]:
combine_df.columns

In [None]:
combine_df.shape

In [None]:
combine_df.describe()

In [None]:
combine_df.dtypes

In [None]:
combine_df.set_index('Timestamp', inplace=True)
combine_df.info()

In [None]:
combine_df.sort_index(inplace=True)

In [None]:
data_close = combine_df['Close']
data_close = data_close.values.reshape(len(data_close), 1)
plt.plot(data_close)
plt.show()

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))

In [None]:
data_close = scaler.fit_transform(data_close)
data_close

In [None]:
# split data into train and test
train_size = int(len(data_close)*0.7)
test_size = len(data_close) - train_size

data_train, data_test = data_close[0:train_size, :], data_close[train_size:len(data_close), :]
print('Split data into train and test: ', len(data_train), len(data_test))

In [None]:
# need to now convert the data into time series looking back over a period of days...e.g. use last 7 days to predict price
def create_ts(ds, series):
    X, Y =[], []
    for i in range(len(ds)-series - 1):
        item = ds[i:(i+series), 0]
        X.append(item)
        Y.append(ds[i+series, 0])
    return np.array(X), np.array(Y)

series = 10080

trainX, trainY = create_ts(data_train, series)
testX, testY = create_ts(data_test, series)

In [None]:
trainX

In [None]:
trainX[0]

In [None]:
# reshape into  LSTM format - samples, steps, features
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [None]:
# build the model
model = Sequential()
model.add(LSTM(4, input_shape=(series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
# fit the model
model.fit(trainX, trainY, epochs=100, batch_size=32)

In [None]:
# test this model out
trainPredictions = model.predict(trainX)
testPredictions = model.predict(testX)

# unscale predictions
trainPredictions = scaler.inverse_transform(trainPredictions)
testPredictions = scaler.inverse_transform(testPredictions)
trainY = scaler.inverse_transform([trainY])
testY = scaler.inverse_transform([testY])

In [None]:
# lets calculate the root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredictions[:, 0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredictions[:, 0]))
print('Train score: %.2f rmse', trainScore)
print('Test score: %.2f rmse', testScore)

In [None]:
# lets plot the predictions on a graph and see how well it did
train_plot = np.empty_like(data_close)
train_plot[:,:] = np.nan
train_plot[series:len(trainPredictions)+series, :] = trainPredictions

test_plot = np.empty_like(data_close)
test_plot[:,:] = np.nan
test_plot[len(trainPredictions)+(series*2)+1:len(data_close)-1, :] = testPredictions

# plot on graph
plt.plot(scaler.inverse_transform(data_close))
plt.plot(train_plot)
plt.plot(test_plot)
plt.show()