In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import mean_squared_error as mse
from sklearn.svm import SVR
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error as mae

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
df = pd.read_csv('ready_returns.csv', header = None)
df.columns = ['date', 'returns']
df.returns = df.returns
data, data2 = df['returns'].values, df['returns'].values
assert len(data) == len(df) == len(data2)

In [3]:

sc = MinMaxScaler(feature_range=(-1,1))
sc.fit(data.reshape(-1,1))
data = sc.transform(data.reshape(-1,1))


In [4]:
data = data.reshape(1,-1)[0]

In [5]:
data

array([ 0.01102189,  0.08478452, -0.00066804, ..., -0.43662616,
       -0.11505437, -0.19925689])

In [6]:
data_gen = TimeseriesGenerator(np.append(data,0.00), np.append(data,0.00),
                                       length= 10, sampling_rate=1,
                                       batch_size=1)

# get features and targets from generated batches
features = []
targets = []
i = 0
while True:
    try:
        features.append(list(data_gen[i][0][0]))
        targets.append(data_gen[i][1][0])
        i += 1
    except:
        break
        
        
# convert to numpy arrays for further use
features = np.array(features)
targets = np.array(targets)



# get the training, validation and testing sets
x_test, y_test = features[-32:], targets[-32:]
x_hold, y_hold = features[:-32], targets[:-32]



x_train, x_valid = x_hold[:int(len(x_hold)*.9)], x_hold[int(len(x_hold)*.9):]
y_train, y_valid = y_hold[:int(len(y_hold)*.9)], y_hold[int(len(y_hold)*.9):]

assert len(x_train) + len(x_valid) == len(x_hold)
assert len(y_train) + len(y_valid) == len(y_hold)

In [7]:
x_train = x_train.reshape(x_train.shape[0],x_train.shape[1], 1)
x_valid = x_valid.reshape(x_valid.shape[0],x_valid.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1], 1)

---

# LSTM modelling

In [8]:
y_test.shape

(32,)

In [9]:
import keras
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout

In [12]:
model = Sequential()
model.add(LSTM(20, return_sequences = True,  input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(LSTM(20, return_sequences = True))
model.add(LSTM(20))
model.add(Dense(1, activation = 'linear'))
print('GRU Model Summary')
model.summary()

GRU Model Summary
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 20)            1760      
_________________________________________________________________
lstm_2 (LSTM)                (None, 10, 20)            3280      
_________________________________________________________________
lstm_3 (LSTM)                (None, 20)                3280      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 21        
Total params: 8,341
Trainable params: 8,341
Non-trainable params: 0
_________________________________________________________________


In [13]:
# optimizer
optim = Adam(lr = 0.005)

In [14]:
model.compile(loss='mse', optimizer=optim, metrics=['mse'])

In [26]:
filepath = "C:\\Users\\AURIMASSilva\\Desktop\\BTC\\memes\\model.hdf5"
checkpoint = [
    ModelCheckpoint(filepath, monitor = 'val_mean_squared_error', verbose =1, save_best_only=True, mode= 'min')]
#callbacks_list = [checkpoint]                           



model.fit(x_train, y_train, batch_size = 1, epochs = 20, validation_data =(x_valid, y_valid), verbose = 2, callbacks = checkpoint)

Train on 1363 samples, validate on 152 samples
Epoch 1/20
 - 1s - loss: 0.0248 - mean_squared_error: 0.0248 - val_loss: 0.0756 - val_mean_squared_error: 0.0756

Epoch 00001: val_mean_squared_error improved from inf to 0.07557, saving model to C:\Users\AURIMASSilva\Desktop\BTC\memes\model.hdf5
Epoch 2/20
 - 1s - loss: 0.0248 - mean_squared_error: 0.0248 - val_loss: 0.0753 - val_mean_squared_error: 0.0753

Epoch 00002: val_mean_squared_error improved from 0.07557 to 0.07533, saving model to C:\Users\AURIMASSilva\Desktop\BTC\memes\model.hdf5
Epoch 3/20
 - 1s - loss: 0.0247 - mean_squared_error: 0.0247 - val_loss: 0.0747 - val_mean_squared_error: 0.0747

Epoch 00003: val_mean_squared_error improved from 0.07533 to 0.07473, saving model to C:\Users\AURIMASSilva\Desktop\BTC\memes\model.hdf5
Epoch 4/20
 - 1s - loss: 0.0247 - mean_squared_error: 0.0247 - val_loss: 0.0758 - val_mean_squared_error: 0.0758

Epoch 00004: val_mean_squared_error did not improve
Epoch 5/20
 - 1s - loss: 0.0248 - mean

<keras.callbacks.History at 0x17384c270b8>

In [27]:
y_hat = model.predict(x_test)

In [28]:
y_test_true = sc.inverse_transform(y_test.reshape(-1,1))

In [29]:
y_hat_true = sc.inverse_transform(y_hat.reshape(-1,1))

In [30]:
np.sqrt(mse(y_test_true, y_hat_true))

4.826360386422331

In [33]:
mae(y_test_true, y_hat_true)

3.8901676566445405

In [32]:
y_test_true.var()

19.31290675432932

In [None]:
np.sum(y_test_true - y_hat_true)/len(y_test_true)

In [None]:
len(y_test_true)

In [31]:
np.sqrt(mse(y_test, y_hat))

0.208064086132727