In [None]:
# REPRODUCIBILATY
from numpy.random import seed
seed(12)
from tensorflow import set_random_seed
set_random_seed(12)

# IMPORTING IMPORTANT LIBRARIES
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy import concatenate as conc
import math
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, classification_report, precision_recall_fscore_support
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Nadam
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM, Bidirectional
from keras.utils import plot_model
from keras import backend as K
import os
import time
from datetime import timedelta
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'


In [None]:
def percentage_change(inp):
    arr =  ((np.diff(inp) / inp[:-1]))
    return arr

def binary(inp):
    l = []
    for i in range(len(inp)):
        if i == 0:
            continue
        else:
            if (inp[i] - inp[i-1]) > 0:
                l.append(1)
            else:
                l.append(0)
    return np.array(l)

def strategy_profit(yhat, yt, plot = False):
    signal = np.array([1 if p == 1 else -1 for p in binary(yhat)]) # Creates a trading signal to buy if price rises, sell if price drops
    signal = np.hstack((0,signal))
    
    df = pd.DataFrame() # Create dataframe for easier computation
    
    returns = yt.reshape((yt.shape[0], )) # create returns series from original data 
    returns[0] = 0
    
    df["return_strat"] = (returns * signal) # daily returns strategy
    np_return_strat = df["return_strat"].values # numpy array of daily returns
    
    df["cumulative_return_strat"] = ((1 + df['return_strat']).cumprod() - 1) * 100 # cumulative return of strategy when reinvesting entire portfolio value
    df["return"] = returns 
    df["cumulative_return"] = (df["return"].cumsum())*100 # Returns of the buy and hold strategy (buy at t=0 and hold untill t=end)
    df["signal"] = signal

    xs = df["cumulative_return_strat"].values.astype("float32")
    
    np.save("model2.npy", xs)
    
    i = np.argmax(np.maximum.accumulate(xs) - xs) # end of the period
    j = np.argmax(xs[:i]) # start of period

    if plot:
        plt.plot(df["cumulative_return"], "r", label = "Buy and hold")
        plt.plot(df["cumulative_return_strat"], "g", label = "Strategy")
        plt.plot()
        plt.legend()
        plt.show()
    return float(df.iloc[-1:]["cumulative_return_strat"] - df.iloc[-1:]["cumulative_return"]), \
            (math.sqrt(365.25) * np.mean(np_return_strat) / np.std(np_return_strat)), \
              (j-i)

In [None]:
# 920 - 1009
# prediction period vdataset["EtherPrice"].iloc[-1023:-934]

In [None]:
# IMPORTING DATASET
dataset = pd.read_csv('ethereum_trainval_dataset.csv')
dataset = dataset.sort_values(by=["UnixTimeStamp"])
dataset = dataset.reindex(index = dataset.index[::-1])
tdataset = pd.read_csv('ethereum_test_dataset.csv')
tdataset = tdataset.sort_values(by=["UnixTimeStamp"])
tdataset = tdataset.reindex(index = tdataset.index[::-1])

In [None]:
# deleting unwanted columns
del dataset["eth_supply"]
del dataset["eth_ethersupply"]
del dataset["eth_marketcap"]
del dataset["Unnamed: 0"]
del dataset["UnixTimeStamp"]
del dataset["eth_ens_register"]
del tdataset["Date(UTC)"]
del tdataset["UnixTimeStamp"]
del tdataset["Unnamed: 0"]

In [None]:
# Make price column the last one for easier use later on
cols = list(dataset)
cols[0], cols[11] = cols[11], cols[0]
dataset = dataset.ix[:,cols]
dataset = dataset[::-1]
tdataset = tdataset[::-1]

In [None]:
# Accounting for blocktime being halved at index 809
dataset["eth_blocktime"].loc[809:] = dataset[809:]["eth_blocktime"] * 2
dataset["eth_uncles"].loc[809:] = dataset[809:]["eth_uncles"] / 2
dataset["eth_blocks"].loc[809:] = dataset[809:]["eth_blocks"] / 2
dataset["eth_difficulty"].loc[809:] = dataset[809:]["eth_difficulty"] * 2

dataset["eth_gasprice"][497] = dataset["eth_gasprice"][497] / 41



In [None]:
tdataset = tdataset[["GasUsed", "TxGrowth", "AddressCount", "NetworkHash", "BlockDifficulty", "BlockCountRewards", \
                     "Uncles", "BlockSize", "BlockTime", "AvgGasPrice", "GasLimit", "EtherPrice"]]

In [None]:
dataset = dataset[13:]
tdataset = tdataset[13:]

In [None]:
# Convert to numpy array and normalize data.
dataset = dataset.values.astype("float32")
tdataset = tdataset.values.astype("float32")

In [None]:
dataset_y = percentage_change(dataset[:, -1])
tdataset_y = percentage_change(tdataset[:, -1])

scaler_z = StandardScaler()

dataset_X = scaler_z.fit_transform(dataset[:, :-1])
tdataset_X = scaler_z.transform(tdataset[:, :-1])

tdataset_X = tdataset_X[0:-1,: ]
dataset_X = dataset_X[0:-1,: ]

tdataset_X = tdataset_X[921:1011]
yt = tdataset_y[921:1011]

print(tdataset_X.shape)
print(yt.shape)

In [None]:
## Train/test split
split = int(len(dataset)*0.9)

train_X = dataset_X[:split, :]
val_X = dataset_X[split:, :]

train_y = dataset_y[:split]
test_y = dataset_y[split:]

yv = test_y

# Reshape for LSTM
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
val_X = val_X.reshape((val_X.shape[0], 1, val_X.shape[1]))
test_X = tdataset_X.reshape((tdataset_X.shape[0], 1, tdataset_X.shape[1]))



print(train_X.shape)
print(train_y.shape)
print(val_X.shape)
print(test_y.shape)

In [None]:
rms = RMSprop(lr = 0.0005)

model = Sequential()

model.add((LSTM(128, input_shape=(1, 11), return_sequences = True))) 
model.add(Activation("relu"))

model.add((LSTM(128, input_shape=(1, 11), return_sequences = True)))
model.add(Activation("relu"))

model.add((LSTM(128, input_shape=(1, 11))))
model.add(Activation("relu"))

model.add(Dense(8))
model.add(Activation("relu"))

model.add(Dense(4))
model.add(Dense(1))

model.add(Activation('linear'))
model.add(Activation('linear'))

# Compile and Run
model.compile(loss = "mean_squared_error", optimizer = rms) # Try SGD, adam, adagrad and compare!!!
model.fit(train_X, train_y, epochs = 1000, batch_size = 64, verbose=2)

In [None]:
yhat = model.predict(test_X)

In [None]:
print(len(yhat))
print(len(yv))

In [None]:
plt.plot(yhat, "g", label = "predicted")
plt.plot(yt, "r", label = "real")
plt.title("Model 1") 
plt.legend()
plt.savefig("test.png")
plt.show()

In [None]:
def error_performance(yhat, yt, plot = False):
    rmse_normalized = math.sqrt(mean_squared_error(yhat, yt))
    mae_normalized = mean_absolute_error(yhat, yt)
    excess_r, sharpe, drawdown = strategy_profit(yhat, yt, plot)

    error_performance = ["rmse: " + str(rmse_normalized), "mae: " + str(mae_normalized),
                             precision_recall_fscore_support(binary(yhat), binary(yt), average = 'weighted'),
                               "strategy profit in %: " + str(excess_r), "Sharpe ratio: " + str(sharpe),
                                 "Drawdown in %: " + str(drawdown)]
    return error_performance

print(error_performance(yhat, yt, plot = True)[0:5])