# Price prediction code


In [1]:
# IMPORTING IMPORTANT LIBRARIES
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy import concatenate as conc
import math
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, classification_report, precision_recall_fscore_support
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Nadam
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM, Bidirectional
from keras.utils import plot_model
from keras import backend as K
import os
import time
from datetime import timedelta
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'


Using TensorFlow backend.


In [2]:
def root_mean_squared_logarithmic_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(K.log((y_pred - y_true), axis=-1))))
    
def percentage_change(inp):
    arr =  ((np.diff(inp) / inp[:-1]))
    return arr

def binary(inp):
    l = []
    for i in range(len(inp)):
        if i == 0:
            continue
        else:
            if (inp[i] - inp[i-1]) > 0:
                l.append(1)
            else:
                l.append(0)
    return np.array(l)

def strategy_profit(yhat, yt):
    signal = np.array([1 if p == 1 else -1 for p in binary(yhat)]) # Creates a trading signal to buy if price rises, sell if price drops
    signal = np.hstack((0,signal))
    
    df = pd.DataFrame() # Create dataframe for easier computation
    
    returns = yt.reshape((yt.shape[0], )) # create returns series from original data 
    returns[0] = 0
    
    df["return_strat"] = (returns * signal) # daily returns strategy
    np_return_strat = df["return_strat"].values # numpy array of daily returns
    
    df["cumulative_return_strat"] = ((1 + df['return_strat']).cumprod() - 1) * 100 # cumulative return of strategy when reinvesting entire portfolio value
    df["return"] = returns 
    df["cumulative_return"] = (df["return"].cumsum())*100 # Returns of the buy and hold strategy (buy at t=0 and hold untill t=end)
    df["signal"] = signal 
    return float(df.iloc[-1:]["cumulative_return_strat"] - df.iloc[-1:]["cumulative_return"]), (math.sqrt(365) * np.mean(np_return_strat) / np.std(np_return_strat))

In [3]:
# FOR REPRODUCIBILITY
np.random.seed(7)

# IMPORTING DATASET 
dataset = pd.read_csv('ethereum_trainval_dataset.csv')
dataset = dataset.reindex(index = dataset.index[::-1])

In [4]:
# deleting unwanted columns
del dataset["eth_supply"]
del dataset["eth_ethersupply"]
del dataset["eth_marketcap"]
del dataset["Unnamed: 0"]
del dataset["UnixTimeStamp"]
del dataset["eth_ens_register"]

In [5]:
# Make price column the last one for easier use later on
cols = list(dataset)
cols[0], cols[11] = cols[11], cols[0]
dataset = dataset.ix[:,cols]
dataset = dataset[::-1]

In [6]:
# Accounting for blocktime being halved at index 809
dataset["eth_blocktime"].loc[809:] = dataset[809:]["eth_blocktime"] * 2
dataset["eth_uncles"].loc[809:] = dataset[809:]["eth_uncles"] / 2
dataset["eth_blocks"].loc[809:] = dataset[809:]["eth_blocks"] / 2
dataset["eth_difficulty"].loc[809:] = dataset[809:]["eth_difficulty"] * 2

In [7]:
# Drop first 13 because of no price values
dataset = dataset[13:]

In [8]:
# Convert to numpy array and normalize data.
dataset = dataset.values
dataset = dataset.astype("float32")
dataset_y = percentage_change(dataset[:, -1])
scaler_mm = MinMaxScaler(feature_range=(0,1))
scaler_z = StandardScaler()
dataset_X = scaler_z.fit_transform(dataset[:, :-1])
dataset_X = dataset_X[0:-1,: ]

In [9]:
print(dataset_y.shape, dataset_X.shape)


(920,) (920, 11)


In [10]:
## Train/test split
split = int(len(dataset)*0.9)

train_X = dataset_X[:split, :]
test_X = dataset_X[split:, :]

train_y = dataset_y[:split,]
test_y = dataset_y[:-split,]

yt = test_y # for analysis later on

# Reshape for LSTM
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))




print(train_X.shape)
print(train_y.shape)
print(test_X.shape)
print(test_y.shape)


(828, 1, 11)
(828,)
(92, 1, 11)
(92,)


In [17]:
def fit_model1_oleb(optimizer, loss_function, epochs = 10, batch_size = 1):
    
    model = Sequential()

    model.add((LSTM(64, input_shape=(1, 11), return_sequences = True))) 
    model.add(Activation("relu"))

    model.add((LSTM(128, input_shape=(1, 11), return_sequences = True)))
    model.add(Activation("relu"))

    model.add((LSTM(128, input_shape=(1, 11))))
    model.add(Activation("relu"))

    model.add(Dense(64))
    model.add(Activation("relu"))

    model.add(Dense(32))
    model.add(Dense(1))

    model.add(Activation('linear'))

    # Compile and Run
    model.compile(loss= loss_function , optimizer = optimizer) # Try SGD, adam, adagrad and compare!!!
    model.fit(train_X, train_y, epochs = epochs, batch_size = batch_size, verbose=0)
    yhat = model.predict(test_X)
    
    return yhat
    

In [18]:
def fit_model1_neurons(n1, n2, n3, n4):
    model = Sequential()

    model.add((LSTM(n1, input_shape=(1, 11), return_sequences = True))) 
    model.add(Activation("relu"))

    model.add((LSTM(n2, input_shape=(1, 11), return_sequences = True)))
    model.add(Activation("relu"))

    model.add((LSTM(n2, input_shape=(1, 11))))
    model.add(Activation("relu"))

    model.add(Dense(n3))
    model.add(Activation("relu"))

    model.add(Dense(n4))
    model.add(Dense(1))

    model.add(Activation('linear'))

    # Compile and Run
    model.compile(loss= "mean_squared_error" , optimizer = RMSprop(lr=0.0005)) # Try SGD, adam, adagrad and compare!!!
    model.fit(train_X, train_y, epochs = 1000, batch_size = 64, verbose=0)
    yhat = model.predict(test_X)
    
    return yhat

In [19]:
def fit_model2_n(n1, n2):
    model = Sequential()

    model.add((LSTM(n1, input_shape=(1, 11), return_sequences= True)))
    
    model.add((LSTM(n2, input_shape=(1, 11))))

    model.add(Dense(n3))
    model.add(Dense(1))

    model.add(Activation('linear'))

    # Compile and Run
    model.compile(loss= "mean_squared_error" , optimizer = RMSprop(lr=0.0005)) # Try SGD, adam, adagrad and compare!!!
    model.fit(train_X, train_y, epochs = 2500, batch_size = 64, verbose=0)
    yhat = model.predict(test_X)
    
    return yhat

In [20]:
# plt.figure(figsize=(50,50))

# count = 1

# d = {}

# for n1 in [4, 8, 16, 32, 64, 128, 256]:
#     for n2 in [4, 8, 16, 32, 64, 128, 256]:
#         print("Starting to train model #" + str(count))
#         start_time = time.time()
        
#         yhat = fit_model2_n(n1, n2)

#         plt.subplot(15, 4, count)
#         plt.plot(yhat, "g", label = "predicted")
#         plt.plot(yt, "r", label = "real")
#         plt.title("n1: {} n2: {}".format(str(n1), str(n2))) 

#         rmse_normalized = math.sqrt(mean_squared_error(yhat, yt))
#         mae_normalized = mean_absolute_error(yhat, yt)
#         excess_r, sharpe = strategy_profit(yhat, yt)

#         d["n1: {} n2: {}".format(str(n1), str(n2))] = \
#         ["rmse: " + str(rmse_normalized), 
#             "mae: " + str(mae_normalized),
#                 precision_recall_fscore_support(binary(yhat), binary(yt), average = 'weighted'),
#                     "strategy profit in %: " + str(excess_r),
#                         "Sharpe ratio: " + str(sharpe),
#                             yhat]

#         print("Finished training model #{} with training time of: {} (h:mm:ss)".format(str(count), timedelta(seconds=round(time.time() - start_time))))
#         count += 1
#         print("------------------")

# plt.legend()
# plt.savefig("test.png")
# plt.show()

In [21]:
def fit_model2_oleb(o, l, e, b):
    model = Sequential()

    model.add((LSTM(256, input_shape=(1, 11)))) 

    model.add(Dense(1))

    model.add(Activation('linear'))

    # Compile and Run
    model.compile(loss = l, optimizer = o) # Try SGD, adam, adagrad and compare!!!
    model.fit(train_X, train_y, epochs = e, batch_size = b, verbose=0)
    yhat = model.predict(test_X)
    
    return yhat

In [22]:
# Gridsearch all but neurons
n_epochs = [1000, 2500, 5000, 7500, 10000] # + 7500
n_batch_size = [128, 64] # all good
n_optimizers = [RMSprop(lr = 0.0005)]
n_loss = ["mean_squared_error"] # = MAE, MSLE

plt.figure(figsize=(30,30))

count = 1

d = {}

for e in n_epochs:
    for b in n_batch_size:
        for o in n_optimizers:
            for l in n_loss:
                print("Starting to train model #" + str(count))
                start_time = time.time()
                
                yhat = fit_model1_oleb(o, l, e, b)
                
                plt.subplot(9, 4, count)
                plt.plot(yhat, "g", label = "predicted")
                plt.plot(yt, "r", label = "real")
                plt.title("e: {} bs: {} o: {} l: {}".format(str(e), str(b), str(o)[18:23], l)) 
                
                rmse_normalized = math.sqrt(mean_squared_error(yhat, yt))
                mae_normalized = mean_absolute_error(yhat, yt)
                excess_r, sharpe = strategy_profit(yhat, yt)
                
                d["e: {} bs: {} o: {} l: {}".format(str(e), str(b), str(o)[18:23], l)] = \
                ["rmse: " + str(rmse_normalized), 
                    "mae: " + str(mae_normalized),
                        precision_recall_fscore_support(binary(yhat), binary(yt), average = 'weighted'),
                            "strategy profit in %: " + str(excess_r),
                                "Sharpe ratio: " + str(sharpe),
                                    yhat]
                
                print("Finished training model #{} with training time of: {}".format(str(count), timedelta(seconds=round(time.time() - start_time))))
                count += 1
                print("------------------")

plt.legend()
plt.savefig("test.png")
plt.show()

Starting to train model #1


InternalError: Blas GEMM launch failed : a.shape=(128, 64), b.shape=(64, 64), m=128, n=64, k=64
	 [[Node: lstm_4/while/MatMul_6 = MatMul[T=DT_FLOAT, _class=["loc:@training/RMSprop/gradients/lstm_4/while/MatMul_6_grad/MatMul"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](lstm_4/while/Switch_3:1, lstm_4/while/MatMul_6/Enter)]]
	 [[Node: loss_1/mul/_161 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4287_loss_1/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

<matplotlib.figure.Figure at 0x2f3a09dea90>

In [None]:
# # Gridsearch neurons
# n1l = [256]
# n2l = [16, 32, 64, 128, 256]
# n3l = [8, 16, 32, 64]
# n4l = [2, 4, 8]

# plt.figure(figsize=(60,60))

# count = 1

# d = {}

# # FIT NEURONS IN NETWORK
# for n1 in n1l:
#     for n2 in n2l:
#         for n3 in n3l:
#             for n4 in n4l:
#                 print("Starting to train model #" + str(count))
#                 start_time = time.time()
#                 yhat = fit_model1_neurons(n1, n2, n3, n4)
                
#                 plt.subplot(15, 4, count)
#                 plt.plot(yhat, "g", label = "predicted")
#                 plt.plot(yt, "r", label = "real")
#                 plt.title("n1: {} n2: {} n3: {} n4: {}".format(str(n1), str(n2), str(n3), str(n4))) 
                
#                 rmse_normalized = math.sqrt(mean_squared_error(yhat, yt))
#                 mae_normalized = mean_absolute_error(yhat, yt)
#                 excess_r, sharpe = strategy_profit(yhat, yt)
                
#                 d["n1: {} n2: {} n3: {} n4: {}".format(str(n1), str(n2), str(n3), str(n4))] = \
#                 ["rmse: " + str(rmse_normalized), 
#                     "mae: " + str(mae_normalized),
#                         precision_recall_fscore_support(binary(yhat), binary(yt), average = 'weighted'),
#                             "strategy profit in %: " + str(excess_r),
#                                 "Sharpe ratio: " + str(sharpe),
#                                     yhat]
                  
#                 elapsed_time_secs = time.time() - start_time
#                 print("Finished training model #{} with training time of: {}".format(str(count), timedelta(seconds=round(elapsed_time_secs))))
#                 count += 1
                
#                 print("------------------")

# plt.legend()
# plt.savefig("test.png")
# plt.show()

In [None]:
for key, value in d.items():
    print(key, "\n", value[0:5])
    print("\n")

In [None]:
print(d["e:10000 bs:128 o:RMSprl:mean_squared_error"][4])

In [None]:
# Run tests to see how many plots needed
c = 0

for e in range(7):
    for b in range(7):
        c += 1
print(c)

In [None]:
plot_model(model, to_file='model.png')

In [None]:
plt.plot(history.history['loss'], label='train')
plt.legend()
plt.show()

In [None]:
# make a prediction
yt = test_y
yhat = model.predict(test_X)

In [None]:
yhat = d["e:500 bs:128 o:RMSprl:mean_squared_error"][4]

In [None]:
plt.plot(yhat, "g", label = "predicted")
plt.plot(yt, "r", label = "real")
plt.legend()
plt.show()

In [None]:
#rmse = math.sqrt(mean_squared_error(inv_yhat, inv_yt))
#mae = mean_absolute_error(inv_yhat, inv_yt)
rmse_normalized = math.sqrt(mean_squared_error(yhat, yt))
mae_normalized = mean_absolute_error(yhat, yt)
print(rmse_normalized, mae_normalized)
# print(rmse, mae, "original errors")

In [None]:
print(classification_report(binary(yhat), binary(yt)))

In [None]:
returns = yt.reshape((yt.shape[0], ))

In [None]:
signal = np.array([1 if p == 1 else -1 for p in binary(yhat)])
signal = np.hstack((0,signal))

In [None]:
df = pd.DataFrame()

In [None]:
df["return_strat"] = (returns * signal) 
df["cumulative_return_strat"] = ((1 + df['return_strat']).cumprod() - 1) * 100
df["return"] = returns 
df["cumulative_return"] = (df["return"].cumsum())*100
df["signal"] = signal

In [None]:
plt.plot(df["cumulative_return"], "r", label = "Buy and hold")
plt.plot(df["cumulative_return_strat"], "g", label = "Strategy")
plt.plot()
plt.legend()
plt.show()