In [None]:
#Code originally ran on Google Colab using GPU provided by Google

#Importing packages
import numpy as np
import pandas as pd
#import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler

In [None]:
#Setting random seed
RANDOM_SEED = 2020
np.random.seed(RANDOM_SEED)
#tf.random.set_seed(RANDOM_SEED)

In [None]:
#Connecting to Google Drive
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
Bitcoin = pd.read_csv("/content/gdrive/My Drive/Bitcoin.csv", parse_dates = ["date"], index_col = "date").sort_index().drop(["Unnamed: 0"], axis=1)
Ethereum = pd.read_csv("/content/gdrive/My Drive/Ethereum.csv", parse_dates = ["date"], index_col = "date").sort_index().drop(["Unnamed: 0"], axis=1)
Ripple = pd.read_csv("/content/gdrive/My Drive/Ripple.csv", parse_dates = ["date"], index_col = "date").sort_index().drop(["Unnamed: 0"], axis=1)


In [None]:
#Creating a function that will return the large or big dataset
# datasetConstructor("Bitcoin"/"Ethereum"/"Ripple", 0/1) where 0 is small dataset and 1 large
# in case of error: reread from csv 

def featureSelector(dataset, crypto, size):
  articles = "articles" + crypto
  sentiment = "sentiment" + crypto
  trend = "trend" + crypto
  wiki = "wiki" + crypto
  if size not in [0, 1]:
    print("wrong size specification")
    pass

  elif size == 0: #small dataset
    return dataset.loc[:,["open", "high", "low", "close", "volume", "marketCap", articles, sentiment, trend, wiki, #changes articles, sentiment, trend and wiki accordingly
           "wti", "brent", "EPU", "vixClose", "target"]]
  else: #big dataset
    return dataset.loc[:,["open", "high", "low", "close", "volume", "marketCap", articles, sentiment, "articlesBlockchain", "sentimentBlockchain", "articlesCryptocurrency", "sentimentCryptocurrency", #changes articles, sentiment accordingly
           "articlesCryptocurrencies", "sentimentCryptocurrencies", trend, "trendBlockchain", "trendCryptocurrency", "trendCryptocurrencies", wiki, "wikiBlockchain", "wikiCryptocurrency", #changes trend and wiki accordingly
           "wti", "brent", "EPU", "vixClose", "target"]]


In [None]:
bitcoin_small = featureSelector(Bitcoin, "Bitcoin", 0)
bitcoin_big = featureSelector(Bitcoin, "Bitcoin", 1)
ethereum_small = featureSelector(Ethereum, "Ethereum", 0)
ethereum_big = featureSelector(Ethereum, "Ethereum", 1)
ripple_small = featureSelector(Ripple, "Ripple", 0)
ripple_big = featureSelector(Ripple, "Ripple", 1)

In [None]:
print(ethereum_big)

              open    high     low   close  ...  brent    EPU  vixClose  target
date                                        ...                                
2017-01-01    7.98    8.47    7.98    8.17  ...  55.05   2351     12.85       1
2017-01-02    8.17    8.44    8.05    8.38  ...  55.05  24204     12.85       1
2017-01-03    8.37   10.00    8.32    9.73  ...  55.05   8985     12.85       1
2017-01-04    9.71   11.28    9.56   11.25  ...  54.57  10199     11.85       0
2017-01-05   11.29   11.89    9.40   10.25  ...  54.99  13447     11.67       0
...            ...     ...     ...     ...  ...    ...    ...       ...     ...
2020-04-26  195.41  199.34  194.77  197.32  ...  15.17  85731     33.29       0
2020-04-27  197.48  199.55  193.45  197.22  ...  15.17  63720     33.29       1
2020-04-28  197.27  198.79  194.85  198.42  ...  15.60  48139     33.57       1
2020-04-29  198.47  218.45  198.12  216.97  ...  17.86  43724     31.23       0
2020-04-30  216.91  227.53  206.44  207.

In [None]:
#Constructing train and test set. Training set size hardcoded to be 90%
def train_test_splitter(dataset):

  train_size = int(len(dataset) * 0.9)
  test_size = len(dataset) - train_size
  train, test = dataset.iloc[0:train_size], dataset.iloc[train_size:len(dataset)]
  
  return train, test


In [None]:
def data_scaler(dataset):

  features = dataset.columns.drop("target")
  
  train, test = train_test_splitter(dataset)

  scaler = MinMaxScaler()
  scaler = scaler.fit(train[features].to_numpy()) #fit data on train data

  test.loc[:, features] = scaler.transform(test[features].to_numpy()) #Scaling all features used (scaler fit on train data) and replacing in dataframe
  print(test)

  return test

In [None]:
# Making function that will format input data to the right dimension
def datasetCreator(X, y):

  Xs, ys = [], []
  for i in range(len(X)):
    Xs.append(X.iloc[i:i+1].to_numpy())
    ys.append(y.iloc[i])
    
  return np.array(Xs), np.array(ys).squeeze()

In [None]:
def get_test_data(dataset):

  test = data_scaler(dataset) 
  X_test, y_test = datasetCreator(test.loc[:, dataset.columns != "target"], test.loc[:, "target"])
  return X_test, y_test

In [None]:
#DO NOT RUN CELL MORE THAN ONCE OTHERWISE DATA WILL GET RESCALED!!!!

X_test_bitcoin_small, y_test_bitcoin_small = get_test_data(bitcoin_small)
X_test_bitcoin_big, y_test_bitcoin_big = get_test_data(bitcoin_big)
X_test_ethereum_small, y_test_ethereum_small = get_test_data(ethereum_small)
X_test_ethereum_big, y_test_ethereum_big = get_test_data(ethereum_big)
X_test_ripple_small, y_test_ripple_small = get_test_data(ripple_small)
X_test_ripple_big, y_test_ripple_big = get_test_data(ripple_big)

#Warning is thrown because data in train and test set are overwritten by scaled data. both train and test data reference to parts of the 'original' pandas df.
# This happens in the scaler function

                open      high       low  ...       EPU  vixClose  target
date                                      ...                            
2019-12-31  0.348612  0.338009  0.352064  ...  0.273001  0.164656       1
2020-01-01  0.343289  0.333807  0.352347  ...  0.481192  0.118169       0
2020-01-02  0.343698  0.331618  0.339192  ...  0.359533  0.118169       1
2020-01-03  0.332034  0.342080  0.338079  ...  0.026883  0.173172       1
2020-01-04  0.351336  0.342790  0.359734  ...  0.550564  0.167140       1
...              ...       ...       ...  ...       ...       ...     ...
2020-04-26  0.363355  0.356970  0.373560  ...  2.221309  0.856991       1
2020-04-27  0.369198  0.361902  0.380038  ...  1.650706  0.856991       1
2020-04-28  0.375484  0.362884  0.382859  ...  1.246792  0.866927       1
2020-04-29  0.376005  0.417760  0.385891  ...  1.132340  0.783889       0
2020-04-30  0.428996  0.447289  0.426905  ...  1.125548  0.173882       0

[122 rows x 15 columns]
             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


                open      high       low  ...       EPU  vixClose  target
date                                      ...                            
2019-12-31  0.089694  0.087957  0.094198  ...  0.273001  0.164656       1
2020-01-01  0.087549  0.087333  0.094510  ...  0.481192  0.118169       0
2020-01-02  0.088406  0.085914  0.092755  ...  0.359533  0.118169       1
2020-01-03  0.085952  0.088533  0.092397  ...  0.026883  0.173172       1
2020-01-04  0.090817  0.089586  0.097504  ...  0.550564  0.167140       1
...              ...       ...       ...  ...       ...       ...     ...
2020-04-26  0.134890  0.134018  0.145632  ...  2.221309  0.856991       0
2020-04-27  0.136380  0.134165  0.144602  ...  1.650706  0.856991       1
2020-04-28  0.136229  0.133631  0.145694  ...  1.246792  0.866927       1
2020-04-29  0.137092  0.147433  0.148243  ...  1.132340  0.783889       0
2020-04-30  0.150363  0.153808  0.154730  ...  1.125548  0.173882       0

[122 rows x 15 columns]
             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


                open      high       low  ...       EPU  vixClose  target
date                                      ...                            
2019-12-31  0.056370  0.049376  0.059268  ...  0.273001  0.164656       0
2020-01-01  0.055892  0.049242  0.059954  ...  0.481192  0.118169       0
2020-01-02  0.055831  0.048859  0.058297  ...  0.359533  0.118169       1
2020-01-03  0.054412  0.049166  0.057944  ...  0.026883  0.173172       1
2020-01-04  0.056073  0.049318  0.059867  ...  0.550564  0.167140       1
...              ...       ...       ...  ...       ...       ...     ...
2020-04-26  0.056524  0.049986  0.060724  ...  2.221309  0.856991       1
2020-04-27  0.057021  0.050523  0.061023  ...  1.650706  0.856991       1
2020-04-28  0.057422  0.055435  0.061324  ...  1.246792  0.866927       1
2020-04-29  0.062644  0.058439  0.066802  ...  1.132340  0.783889       0
2020-04-30  0.066104  0.060023  0.066205  ...  1.125548  0.173882       0

[122 rows x 26 columns]


In [None]:
model_bitcoin_small = keras.models.load_model("/content/gdrive/My Drive/Bitcoin Small/Model6.hdf5")
model_bitcoin_big = keras.models.load_model("/content/gdrive/My Drive/Bitcoin Big/Model8.hdf5")
model_ethereum_small = keras.models.load_model("/content/gdrive/My Drive/Ethereum Small/Model6.hdf5")
model_ethereum_big = keras.models.load_model("/content/gdrive/My Drive/Ethereum Big/Model23.hdf5")
model_ripple_small = keras.models.load_model("/content/gdrive/My Drive/Ripple Small/Model29.hdf5")
model_ripple_big = keras.models.load_model("/content/gdrive/My Drive/Ripple Big/Model14.hdf5")

In [None]:
#Bitcoin small
test_loss_score, test_acc_score = model_bitcoin_small.evaluate(X_test_bitcoin_small, y_test_bitcoin_small)



In [None]:
#Bitcoin big
test_loss_score, test_acc_score = model_bitcoin_big.evaluate(X_test_bitcoin_big, y_test_bitcoin_big)




In [None]:
#Ethereum small
test_loss_score, test_acc_score = model_ethereum_small.evaluate(X_test_ethereum_small, y_test_ethereum_small)



In [None]:
#Ethereum big
test_loss_score, test_acc_score = model_ethereum_big.evaluate(X_test_ethereum_big, y_test_ethereum_big)



In [None]:
#Ripple small
test_loss_score, test_acc_score = model_ripple_small.evaluate(X_test_ripple_small, y_test_ripple_small)



In [None]:
#Ripple big
test_loss_score, test_acc_score = model_ripple_big.evaluate(X_test_ripple_big, y_test_ripple_big)



In [None]:
prediction_bitcoin_small = model_bitcoin_small.predict_classes(X_test_bitcoin_small)#.flatten().tolist()
prediction_bitcoin_big = model_bitcoin_big.predict_classes(X_test_bitcoin_big)
prediction_ethereum_small = model_ethereum_small.predict_classes(X_test_ethereum_small)
prediction_ethereum_big = model_ethereum_big.predict_classes(X_test_ethereum_big)
prediction_ripple_small = model_ripple_small.predict_classes(X_test_ripple_small)
prediction_ripple_big = model_ripple_big.predict_classes(X_test_ripple_big)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

In [None]:
def metric_print(names):
  print(confusion_matrix(names[0], names[1]))
  print("Accuracy:", accuracy_score(names[0], names[1]))
  print("Recall:", recall_score(names[0], names[1]))
  tn, fp, fn, tp = confusion_matrix(names[0], names[1]).ravel()
  specificity = tn / (tn+fp)
  print("Specificity:", specificity)
  print(confusion_matrix(names[0], names[1]).ravel())


In [None]:
metric_print([y_test_bitcoin_small, prediction_bitcoin_small])

[[ 5 56]
 [ 1 60]]
Accuracy: 0.5327868852459017
Recall: 0.9836065573770492
Specificity: 0.08196721311475409
[ 5 56  1 60]


In [None]:
metric_print([y_test_bitcoin_big, prediction_bitcoin_big])

[[24 37]
 [28 33]]
Accuracy: 0.4672131147540984
Recall: 0.5409836065573771
Specificity: 0.39344262295081966
[24 37 28 33]


In [None]:
metric_print([y_test_ethereum_small, prediction_ethereum_small])


[[28 24]
 [45 25]]
Accuracy: 0.4344262295081967
Recall: 0.35714285714285715
Specificity: 0.5384615384615384


In [None]:
metric_print([y_test_ethereum_big, prediction_ethereum_big])

[[19 33]
 [19 51]]
Accuracy: 0.5737704918032787
Recall: 0.7285714285714285
Specificity: 0.36538461538461536


In [None]:
metric_print([y_test_ripple_small, prediction_ripple_small])

[[55  0]
 [67  0]]
Accuracy: 0.45081967213114754
Recall: 0.0
Specificity: 1.0


In [None]:
metric_print([y_test_ripple_big, prediction_ripple_big])

[[34 21]
 [32 35]]
Accuracy: 0.5655737704918032
Recall: 0.5223880597014925
Specificity: 0.6181818181818182
