# Time Series predictions using LSTM

Yet another try

In [1]:
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime
import numpy as np
import graphviz
import random

import sys
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import bokeh.models
import bokeh.plotting as bk
bk.output_notebook()

import ts_helpers

In [2]:
%%javascript
IPython.notebook.kernel.execute('nb_name = ' + '"' + IPython.notebook.notebook_name + '"')

<IPython.core.display.Javascript object>

In [3]:
np.random.seed(42)
random.seed(42)

WINDOW_IN = 7
WINDOW_OUT = 4

TICKERS = [pd.read_csv('tickers/%s' % f)
           for f in os.listdir('tickers')
           if f.endswith('.csv')
          ]
TICKERS = [ts_helpers.build_corpus(
                    df,
                    wnd_in=WINDOW_IN,
                    wnd_out=WINDOW_OUT)
            for df in TICKERS
            if df.shape[0] >= 364
]

In [4]:
class RNN(nn.Module): 
    
    def __init__(self, n_in, n_mem, days_out):
        super(RNN, self).__init__()
        self.n_in, self.n_mem, self.days_out = n_in, n_mem, days_out
        self.xm2f = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2invent = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2save = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.x2mid = nn.Linear(in_features=n_in, out_features=n_mem)
        self.mid2o = nn.Linear(in_features=n_mem, out_features=days_out)
        
    def init_weights(self, var=.1):
        for tr in [
                self.xm2f,
                self.xm2invent,
                self.xm2save,
                self.x2mid,
                self.mid2o,
                ]:
            tr.weight.data.uniform_(-var, var)
    
    def forward(self, x, mem):
        xmem = torch.cat((x, mem))
        mem = mem * F.sigmoid(self.xm2f(xmem))
        new_mem = F.tanh(self.xm2invent(xmem))
        mem_mask = F.sigmoid(self.xm2save(xmem))
        mem = ((1 - mem_mask) * mem
               + mem_mask * new_mem
        )
        mid = self.x2mid(x) * mem
        o = self.mid2o(mid)
        sgn = torch.stack((F.sigmoid(-o), F.sigmoid(o)), dim=1)
        return (o, sgn, mem)
    
    def init_mem(self):
        return Variable(torch.zeros(self.n_mem))

In [5]:
OBJECTIVE = ts_helpers.my_objective

In [6]:
rnn = RNN(n_in=TICKERS[0][0].shape[1], n_mem=64, days_out=TICKERS[0][1].shape[1])
rnn.init_weights(.1)
optimizer = torch.optim.RMSprop(rnn.parameters(), lr=.001)
if os.path.exists(nb_name + '.weights'):
    weights = torch.load(nb_name + '.weights')
    rnn.load_state_dict(weights)

In [7]:
from sklearn.model_selection import train_test_split
TR, TE = train_test_split(TICKERS)

In [8]:
def get_tr():
    return random.choice(TR)

def get_te():
    return random.choice(TE)

In [9]:
def train(model, epochs=2):
    for epoch in range(1, epochs+1):
        if epoch % 8 == 1:
            X, y = get_tr()
        print(
            'Epoch #%s; Loss: %s'
            % (
                epoch,
                ts_helpers.loss(model, X, y,
                     OBJECTIVE,
                     optimizer)))

In [10]:
train(rnn, epochs=50)

Epoch #1; Loss: 0.027661966159939766
Epoch #2; Loss: 0.025149058550596237
Epoch #3; Loss: 0.024926800280809402
Epoch #4; Loss: 0.02480660192668438
Epoch #5; Loss: 0.024691425263881683
Epoch #6; Loss: 0.024583835154771805
Epoch #7; Loss: 0.02446405589580536
Epoch #8; Loss: 0.024391502141952515
Epoch #9; Loss: 1258.66259765625
Epoch #10; Loss: 877.18994140625
Epoch #11; Loss: 775.354736328125
Epoch #12; Loss: 718.8861694335938
Epoch #13; Loss: 680.7930297851562
Epoch #14; Loss: 645.5838623046875
Epoch #15; Loss: 621.7166137695312
Epoch #16; Loss: 606.2349853515625
Epoch #17; Loss: 0.18004563450813293
Epoch #18; Loss: 0.1749957650899887
Epoch #19; Loss: 0.17182116210460663
Epoch #20; Loss: 0.169429749250412
Epoch #21; Loss: 0.16751036047935486
Epoch #22; Loss: 0.16601566970348358
Epoch #23; Loss: 0.16488394141197205
Epoch #24; Loss: 0.16403169929981232
Epoch #25; Loss: 0.1766752004623413
Epoch #26; Loss: 0.17562170326709747
Epoch #27; Loss: 0.17442253232002258
Epoch #28; Loss: 0.173380881

In [11]:
Xte, yte = get_te()
ts_helpers.evaluate_model(rnn, Xte, yte,
               include_plohi=True, include_tlohi=True,
               title='Evaluation on the test set')

In [12]:
def eval_signs(model, next_df):
    Xte, yte = next_df()
    mem = model.init_mem()
    cont_sgn_err = 0
    acc = 0
    nextday_acc = 0
    for x, y in ts_helpers.chunk_iterator(Xte, yte):
        yp, sp, mem = model(x, mem)
        cont_sgn_err += torch.abs(
            sp
            - torch.stack((
                F.sigmoid(-y),
                F.sigmoid(y)),
                dim=1)
        ).data
        acc += (y.data*yp.data > 0).sum()
        nextday_acc += y.data[0]*yp.data[0] > 0
    cont_sgn_err = 1 - cont_sgn_err.sum()/yte.size
    acc = acc/yte.size
    nextday_acc /= Xte.shape[0]
    return acc, nextday_acc, cont_sgn_err

In [13]:
sign_errs = pd.DataFrame([eval_signs(rnn, get_te) for i in range(10)],
             index=range(10),
             columns=['accuracy', 'nextday_accuracy', 'average_error'])

In [14]:
sign_errs.mean()

accuracy            0.720105
nextday_accuracy    0.487421
average_error       0.883091
dtype: float64

In [15]:
sign_errs

Unnamed: 0,accuracy,nextday_accuracy,average_error
0,0.752691,0.499119,0.940643
1,0.737459,0.514851,0.923935
2,0.686518,0.448049,0.966181
3,0.570029,0.504308,0.761085
4,0.73471,0.498226,0.770235
5,0.740628,0.461106,0.87929
6,0.73074,0.484541,0.957562
7,0.761644,0.501684,0.859195
8,0.7594,0.474747,0.830674
9,0.727234,0.487582,0.942105


In [21]:
train(rnn, epochs=25)

Epoch #1; Loss: 0.7610455751419067
Epoch #2; Loss: 0.741489827632904
Epoch #3; Loss: 0.7301685214042664
Epoch #4; Loss: 0.720731258392334
Epoch #5; Loss: 0.7121723294258118
Epoch #6; Loss: 0.7039424777030945
Epoch #7; Loss: 0.6954324841499329
Epoch #8; Loss: 0.686896026134491
Epoch #9; Loss: 0.044913943856954575
Epoch #10; Loss: 0.04301545023918152
Epoch #11; Loss: 0.04289786517620087
Epoch #12; Loss: 0.04280592501163483
Epoch #13; Loss: 0.04274117946624756
Epoch #14; Loss: 0.04269915446639061
Epoch #15; Loss: 0.04267071187496185
Epoch #16; Loss: 0.04264617711305618
Epoch #17; Loss: 0.1691068857908249
Epoch #18; Loss: 0.16437841951847076
Epoch #19; Loss: 0.16274744272232056
Epoch #20; Loss: 0.1608382761478424
Epoch #21; Loss: 0.1591920405626297
Epoch #22; Loss: 0.15773575007915497
Epoch #23; Loss: 0.15649420022964478
Epoch #24; Loss: 0.1551923304796219
Epoch #25; Loss: 0.9698631763458252


In [23]:
Xte, yte = get_te()
ts_helpers.evaluate_model(rnn, Xte, yte,
               include_tlohi=True,
               title='Evaluation on a random test set')

In [43]:
train(rnn, epochs=10)

Epoch #1; Loss: 0.46933111548423767
Epoch #2; Loss: 0.46223312616348267
Epoch #3; Loss: 0.4573774039745331
Epoch #4; Loss: 0.4537244737148285
Epoch #5; Loss: 0.4502667188644409
Epoch #6; Loss: 0.4466910660266876
Epoch #7; Loss: 0.4429461658000946
Epoch #8; Loss: 0.4389020800590515
Epoch #9; Loss: 0.09699895977973938
Epoch #10; Loss: 0.08951783925294876


In [47]:
Xte, yte = get_te()
ts_helpers.evaluate_model(rnn, Xte, yte,
               include_tlohi=True,
               title='Evaluation on a test set')

In [45]:
torch.save(rnn.state_dict(), nb_name + '.weights')

In [46]:
pd.DataFrame([eval_signs(rnn, get_te) for i in range(15)],
             index=range(15),
             columns=['accuracy', 'nextday_accuracy', 'average_error'])

Unnamed: 0,accuracy,nextday_accuracy,average_error
0,0.699274,0.494171,0.772999
1,0.759539,0.476602,0.952743
2,0.563398,0.3852,0.969852
3,0.116464,0.123404,0.885404
4,0.675903,0.473552,0.952708
5,0.425748,0.362392,0.843033
6,0.737625,0.466802,0.878849
7,0.69568,0.487427,0.73816
8,0.737202,0.446021,0.953304
9,0.752661,0.502281,0.855414


In [None]:
Xte, yte = get_te()
ts_helpers.evaluate_model(rnn, Xte, yte,
               include_tlohi=True,
               title='Evaluation on a random test set')

In [None]:
eval_signs(rnn, get_te)

In [None]:
Xte, yte = get_te()
ts_helpers.evaluate_model(rnn, Xte, yte,
               include_tlohi=True,
               title='Evaluation on a random test set')

In [35]:
yp, sp = ts_helpers.predictions(rnn, Xte, yte.columns)
display(pd.concat((yte.tail(6).head(1), yp.tail(6).head(1))).transpose())

Unnamed: 0,1967,1967.1
Close1,-0.08,0.013441
Low1,-0.12,-0.096342
High1,0.01,0.130511
Close2,0.26,0.000792
Low2,0.0,-0.108064
High2,0.28,0.10588
Close3,0.01,0.025668
Low3,-0.07,-0.088388
High3,0.12,0.120929
Close4,0.07,0.006697


In [48]:
X, y = get_te()

In [51]:
TEST_SIZE=128
Xtr, ytr = X.iloc[:-TEST_SIZE, :], y.iloc[:-TEST_SIZE, :]
Xte, yte = X.iloc[-TEST_SIZE:, :], y.iloc[-TEST_SIZE:, :]

In [52]:
EPOCHS = 20
for epoch in range(1, EPOCHS+1):
    print(
        'Epoch #%s; Loss: %s'
        % (
            epoch,
            ts_helpers.loss(rnn, Xtr, ytr,
                 OBJECTIVE,
                 optimizer)))

Epoch #1; Loss: 0.8240823149681091
Epoch #2; Loss: 0.7823857069015503
Epoch #3; Loss: 0.7628390789031982
Epoch #4; Loss: 0.7481539845466614
Epoch #5; Loss: 0.7344274520874023
Epoch #6; Loss: 0.7198953032493591
Epoch #7; Loss: 0.7055656909942627
Epoch #8; Loss: 0.6932212114334106
Epoch #9; Loss: 0.6800518035888672
Epoch #10; Loss: 0.667789101600647
Epoch #11; Loss: 0.6553550958633423
Epoch #12; Loss: 0.6478637456893921
Epoch #13; Loss: 0.634014904499054
Epoch #14; Loss: 0.6220782995223999
Epoch #15; Loss: 0.6116430163383484
Epoch #16; Loss: 0.6003634929656982
Epoch #17; Loss: 0.6005860567092896
Epoch #18; Loss: 0.590842068195343
Epoch #19; Loss: 0.5779390335083008
Epoch #20; Loss: 0.5652198195457458


In [53]:
ts_helpers.evaluate_model(rnn, X, y,
               include_tlohi=True,
               title='Evaluation on a test set')

In [55]:
EPOCHS = 40
for epoch in range(1, EPOCHS+1):
    print(
        'Epoch #%s; Loss: %s'
        % (
            epoch,
            ts_helpers.loss(rnn, Xtr, ytr,
                 OBJECTIVE,
                 optimizer)))

Epoch #1; Loss: 0.5554362535476685
Epoch #2; Loss: 0.5911763906478882
Epoch #3; Loss: 0.5622075796127319
Epoch #4; Loss: 0.5410158038139343
Epoch #5; Loss: 0.5305415987968445
Epoch #6; Loss: 0.5242649912834167
Epoch #7; Loss: 0.5232122540473938
Epoch #8; Loss: 0.5140835046768188
Epoch #9; Loss: 0.5024034380912781
Epoch #10; Loss: 0.49374091625213623
Epoch #11; Loss: 0.488600492477417
Epoch #12; Loss: 0.48627224564552307
Epoch #13; Loss: 0.4859564006328583
Epoch #14; Loss: 0.4812828600406647
Epoch #15; Loss: 0.46673476696014404
Epoch #16; Loss: 0.46553048491477966
Epoch #17; Loss: 0.45609015226364136
Epoch #18; Loss: 0.448078453540802
Epoch #19; Loss: 0.44342032074928284
Epoch #20; Loss: 0.44115450978279114
Epoch #21; Loss: 0.43991369009017944
Epoch #22; Loss: 0.43031078577041626
Epoch #23; Loss: 0.4215724766254425
Epoch #24; Loss: 0.41847288608551025
Epoch #25; Loss: 0.4213118851184845
Epoch #26; Loss: 0.4171472489833832
Epoch #27; Loss: 0.414334237575531
Epoch #28; Loss: 0.41278654336

In [56]:
ts_helpers.evaluate_model(rnn, X, y,
               include_tlohi=True,
               title='Evaluation on a test set')