In [1]:
from google.cloud import bigquery
from google.colab import auth
from copy import deepcopy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Packages for ML
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
from tensorflow.keras.layers import MaxPooling1D, Flatten
from tensorflow.keras.regularizers import L1, L2
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.utils import plot_model

In [None]:
auth.authenticate_user()

In [None]:
client = bigquery.Client(project='nft-dashboard-381202')

In [None]:
Query = f"""
    SELECT *
    FROM `nft-dashboard-381202.crypto_pipeline.crypto_eth_prices`
    ORDER BY Date ASC
"""
data = client.query(Query).to_dataframe()

In [None]:
data.head()

### Data Preprocessing and EDA

In [None]:
data['Open'] = data['Open'].astype(float)
data['High'] = data['High'].astype(float)
data['Low'] = data['Low'].astype(float)
data['Close'] = data['Close'].astype(float)
data['Adj_Close'] = data['Adj_Close'].astype(float)
data['Volumn'] = data['Volumn'].astype(int)
data['Prev_Close'] = data['Prev_Close'].astype(float)
data['Simple_Return'] = data['Simple_Return'].astype(float)
data['Log_Return'] = data['Log_Return'].astype(float)

In [None]:
data.describe()

In [None]:


cols_plot = ['Open', 'High', 'Low', 'Close', 'Adj_Close']
graphs = data[cols_plot].plot(marker='.', alpha=0.5, linestyle='None', figsize=(11, 9), subplots=True)
for graph in graphs:
    graph.set_ylabel('Daily trade')

In [None]:
X = []
Y = []
window_size=10
for i in range(1 , len(data) - window_size - 1 , 1):
    first = data.iloc[i,0]
    features = []
    label = []
    for j in range(window_size):
        features.append((data.iloc[i + j, 4] - first) / first)
    label.append((data.iloc[i + window_size, 4] - first) / first)
    X.append(np.array(features).reshape(window_size, 1))
    Y.append(np.array(label).reshape(1, 1))

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

train_X = np.array(x_train)
test_X = np.array(x_test)
train_Y = np.array(y_train)
test_Y = np.array(y_test)

train_X = train_X.reshape(train_X.shape[0],1,window_size,1)
test_X = test_X.reshape(test_X.shape[0],1,window_size,1)

print(train_X.shape)
print(test_X.shape)

In [None]:
def construct_model():
  model = tf.keras.Sequential()

  # CNN layers
  model.add(TimeDistributed(Conv1D(64, kernel_size=3, activation='relu', input_shape=(None, window_size, 1))))
  model.add(TimeDistributed(MaxPooling1D(2)))
  model.add(TimeDistributed(Conv1D(128, kernel_size=3, activation='tanh')))
  model.add(TimeDistributed(MaxPooling1D(2)))
  # model.add(TimeDistributed(Conv1D(32, kernel_size=3, activation='relu')))
  # model.add(TimeDistributed(MaxPooling1D(2)))
  model.add(TimeDistributed(Flatten()))
  # model.add(Dense(5, kernel_regularizer=L2(0.01)))

  # LSTM layers
  model.add(Bidirectional(LSTM(50, return_sequences=True)))
  model.add(Dropout(0.5))
  # model.add(Bidirectional(LSTM(window_size, return_sequences=False)))
  # model.add(Dropout(0.5))

  #Final layers
  model.add(Dense(1, activation='linear'))
  model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae'])
  return model

# Train the model
model = construct_model()
results = model.fit(train_X, train_Y, validation_data=(test_X,test_Y), epochs=50, batch_size=50, verbose=1, shuffle =True)

In [None]:
plt.plot(results.history['loss'], label='train loss')
plt.plot(results.history['val_loss'], label='val loss')
plt.xlabel("epoch")
plt.ylabel("Loss")
plt.legend()

In [None]:
# Print_the_model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)

In [None]:
model.evaluate(test_X, test_Y)

In [None]:
from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance
from sklearn.metrics import r2_score
from sklearn.metrics import max_error

# predict probabilities for test set
yhat_probs = model.predict(test_X, verbose=0)
# reduce to 1d array
yhat_probs = yhat_probs[:, 0]
test_label = test_Y.reshape(-1,1)
var = explained_variance_score(test_label, yhat_probs)
print('Variance: %f' % var)

r2 = r2_score(test_label, yhat_probs)
print('R2 Score: %f' % var)

var2 = max_error(test_label, yhat_probs)
print('Max Error: %f' % var2)

In [None]:
predicted  = model.predict(test_X)
predicted = np.array(predicted[:,0]).reshape(-1,1)
real_prices = deepcopy(test_label)
len_t = len(train_X)
for j in range(len_t , len_t + len(test_X)):
    # Use Open Prices of Crypto to predict
    temp = data.iloc[j,0]
    real_prices[j - len_t] = real_prices[j - len_t] * temp + temp
    predicted[j - len_t] = predicted[j - len_t] * temp + temp

In [None]:
plt.plot(predicted, color = 'green', label = 'Predicted ETH Price', alpha = 0.8)
plt.plot(real_prices, color = 'red', label = 'Real ETH Price', alpha = 0.5)
plt.title(' ETH Price Prediction')
plt.xlabel('Time')
plt.ylabel(' ETH Price')
plt.legend()
plt.show()