# Time Series predictions using LSTM

Yet another try

In [1]:
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime
import numpy as np
import graphviz

import sys
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import bokeh.models
import bokeh.plotting as bk
bk.output_notebook()

In [2]:
if not os.path.exists('GOOG.csv'):
    df = data.DataReader('GOOG', 'yahoo', datetime(2010, 1, 1), datetime(2017, 11, 1))
    df.to_csv('GOOG.csv')
else:
    df = pd.read_csv('GOOG.csv')

In [3]:
WINDOW_IN = 7
WINDOW_OUT = 4

In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-01-04,311.44931,312.721039,310.103088,311.349976,311.349976,3937800
1,2010-01-05,311.563568,311.891449,308.76181,309.978882,309.978882,6048500
2,2010-01-06,310.907837,310.907837,301.220856,302.164703,302.164703,8009000
3,2010-01-07,302.731018,303.029083,294.410156,295.130463,295.130463,12912000
4,2010-01-08,294.08725,299.675903,292.651581,299.06488,299.06488,9509900


In [5]:
df.Close.head()

0    311.349976
1    309.978882
2    302.164703
3    295.130463
4    299.064880
Name: Close, dtype: float64

In [6]:
def build_corpus(df, wnd_in=WINDOW_IN, wnd_out=WINDOW_OUT):
    df = df.drop('Date', axis=1)
    # df = df.div(df.max())
    
    X, X_columns = [], []
    
    base = df.loc[:, ['Close', 'Low', 'High']]
    base = base.subtract(df.Close.shift(1), axis='index')
    
    X = pd.concat(
        [base.shift(k).add_suffix('_%s_ago' % k)
         for k in range(wnd_in)],
        axis=1,
    )
    X = X.fillna(0)
    
    y = pd.concat(
        [base.shift(-k).add_suffix('%s' % k)
         for k in range(1, wnd_out + 1)],
        axis=1
    )
    y = y.fillna(0)
    return X, y

In [7]:
X, y = build_corpus(df)

In [8]:
pd.concat((df.Close, X), axis=1).head()

Unnamed: 0,Close,Close_0_ago,Low_0_ago,High_0_ago,Close_1_ago,Low_1_ago,High_1_ago,Close_2_ago,Low_2_ago,High_2_ago,...,High_3_ago,Close_4_ago,Low_4_ago,High_4_ago,Close_5_ago,Low_5_ago,High_5_ago,Close_6_ago,Low_6_ago,High_6_ago
0,311.349976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,309.978882,-1.371094,-2.588166,0.541473,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,302.164703,-7.814179,-8.758026,0.928955,-1.371094,-2.588166,0.541473,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,295.130463,-7.03424,-7.754547,0.86438,-7.814179,-8.758026,0.928955,-1.371094,-2.588166,0.541473,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,299.06488,3.934417,-2.478882,4.54544,-7.03424,-7.754547,0.86438,-7.814179,-8.758026,0.928955,...,0.541473,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
y.head()

Unnamed: 0,Close1,Low1,High1,Close2,Low2,High2,Close3,Low3,High3,Close4,Low4,High4
0,-1.371094,-2.588166,0.541473,-7.814179,-8.758026,0.928955,-7.03424,-7.754547,0.86438,3.934417,-2.478882,4.54544
1,-7.814179,-8.758026,0.928955,-7.03424,-7.754547,0.86438,3.934417,-2.478882,4.54544,-0.452057,-3.964233,1.212098
2,-7.03424,-7.754547,0.86438,3.934417,-2.478882,4.54544,-0.452057,-3.964233,1.212098,-5.28067,-6.512664,-1.465484
3,3.934417,-2.478882,4.54544,-0.452057,-3.964233,1.212098,-5.28067,-6.512664,-1.465484,-1.684051,-8.236419,-1.043213
4,-0.452057,-3.964233,1.212098,-5.28067,-6.512664,-1.465484,-1.684051,-8.236419,-1.043213,1.371094,-2.12616,3.532043


In [10]:
X.tail()

Unnamed: 0,Close_0_ago,Low_0_ago,High_0_ago,Close_1_ago,Low_1_ago,High_1_ago,Close_2_ago,Low_2_ago,High_2_ago,Close_3_ago,...,High_3_ago,Close_4_ago,Low_4_ago,High_4_ago,Close_5_ago,Low_5_ago,High_5_ago,Close_6_ago,Low_6_ago,High_6_ago
1968,-0.770019,-1.130005,14.269959,2.790039,-10.019958,5.550049,2.089966,-7.450012,3.779968,-19.75,...,1.320008,3.75,0.130005,6.549988,-8.359986,-14.419983,-3.929993,0.630005,-5.205017,4.539978
1969,46.710022,35.640014,75.830017,-0.770019,-1.130005,14.269959,2.790039,-10.019958,5.550049,2.089966,...,3.779968,-19.75,-22.080017,1.320008,3.75,0.130005,6.549988,-8.359986,-14.419983,-3.929993
1970,-2.160035,-11.77002,5.699951,46.710022,35.640014,75.830017,-0.770019,-1.130005,14.269959,2.790039,...,5.550049,2.089966,-7.450012,3.779968,-19.75,-22.080017,1.320008,3.75,0.130005,6.549988
1971,-0.46997,-6.690002,6.890015,-2.160035,-11.77002,5.699951,46.710022,35.640014,75.830017,-0.770019,...,14.269959,2.790039,-10.019958,5.550049,2.089966,-7.450012,3.779968,-19.75,-22.080017,1.320008
1972,8.859985,0.309997,13.030029,-0.46997,-6.690002,6.890015,-2.160035,-11.77002,5.699951,46.710022,...,75.830017,-0.770019,-1.130005,14.269959,2.790039,-10.019958,5.550049,2.089966,-7.450012,3.779968


In [11]:
class RNN(nn.Module):
    
    
    def __init__(self, n_in, n_mem, days_out):
        super(RNN, self).__init__()
        self.n_in, self.n_mem, self.days_out = n_in, n_mem, days_out
        self.xm2f = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2invent = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2save = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.x2mid = nn.Linear(in_features=n_in, out_features=n_mem)
        self.mid2o = nn.Linear(in_features=n_mem, out_features=days_out)
        
    def init_weights(self, var=.1):
        for tr in [
                self.xm2f,
                self.xm2invent,
                self.xm2save,
                self.x2mid,
                self.mid2o,
                ]:
            tr.weight.data.uniform_(-var, var)
    
    def forward(self, x, mem):
        xmem = torch.cat((x, mem))
        mem = mem * F.sigmoid(self.xm2f(xmem))
        new_mem = F.tanh(self.xm2invent(xmem))
        mem_mask = F.sigmoid(self.xm2save(xmem))
        mem = ((1 - mem_mask) * mem
               + mem_mask * new_mem
        )
        mid = self.x2mid(x) * mem
        o = self.mid2o(mid)
        sgn = torch.stack((F.sigmoid(-o), F.sigmoid(o)), dim=1)
        return (o, sgn, mem)
    
    def init_mem(self):
        return Variable(torch.zeros(self.n_mem))

In [12]:
def my_objective(y, y_pred, s_pred, alpha=.6):
    n = sum(y.data.shape)
    
    # mask = Variable(((y_pred*y).data < 0).float())
    # fee = (mask*(y_pred - y)**2).sum()
        
    invcoef = Variable(torch.cumsum(torch.ones(*y.data.shape), dim=0))
    
    pos = F.sigmoid(y)
    neg = F.sigmoid(-y)
    ZERO = Variable(torch.Tensor([1e-16]))
    fee = (
        pos * torch.log(torch.max(s_pred[:, 0], ZERO))
        + neg * torch.log(torch.max(s_pred[:, 1], ZERO))
    )
    fee = (-fee/invcoef).sum()

    mse = torch.sum((y_pred - y)**2/invcoef)
    
    L = (1-alpha)*fee + alpha*mse
    return L

In [13]:
OBJECTIVE = my_objective

In [14]:
def loss(model, X, y,
         objective,
         optimizer,
        ):
    mem = model.init_mem()
    mean_L = 0
    X = torch.Tensor(X.as_matrix())
    Y = torch.Tensor(y.as_matrix())
    
    L = 0
    PERIOD = 32
    for i in range(X.shape[0]):
        x = Variable(X[i, :])
        y = Variable(Y[i, :], requires_grad=1)
        y_pred, s_pred, mem = model(x, mem)
        
        L += objective(y, y_pred, s_pred)
        
        if optimizer is None:
            continue
        if i == (X.shape[0] - 1) or i % PERIOD == 0:
            optimizer.zero_grad()
            L.backward(retain_graph=1)
            optimizer.step()
            mean_L += L.data
            L = 0
    mean_L = mean_L/X.shape[0]
    return mean_L[0]

In [15]:
def train(model, X, y, optimizer, objective=OBJECTIVE, epochs=2):
    for epoch in range(1, epochs+1):
        print(
            'Epoch #%s; Loss: %s'
            % (
                epoch,
                loss(model, X, y,
                     objective,
                     optimizer)))

In [16]:
def predictions(model, X):
    mem = model.init_mem()
    pred = torch.zeros(X.shape[0], model.days_out)
    sign = torch.zeros(X.shape[0], model.days_out, 2)
    index = X.index
    X = torch.Tensor(X.as_matrix())
    for i in range(X.shape[0]):
        x = Variable(X[i, :])
        y, s, mem = model(x, mem)
        pred[i, :] = y.data
        sign[i, :, :] = s.data
    pred = pd.DataFrame(pred.numpy())
    #sign = pd.DataFrame(sign.numpy())
    pred.columns = globals()['y'].columns
    #sign.columns = ['Next_%s' % (1+k) for k in range(model.n_out)]
    pred.index = index
    #sign.index = index
    return pred, sign

In [36]:
def plot_stocks(opn, cls,
                p, w=4, alpha=1.,
                line_color='gray',
                color_inc='#F2583E', color_dec='blue',
                legend_prefix='Stocks'):
    inc, dec = cls > opn, cls < opn

    
def evaluate_model(model,
                   X, y, base_price=0,
                   include_tclose=True,
                   include_pclose=True,
                   include_tlohi=False,
                   include_plohi=True,
                   title='Model evaluation'):
    W=1
    pred, sign = predictions(model, X)
    opens = base_price + X.iloc[:, 0].cumsum()
    truth = opens + y.iloc[:, 0]
    pred_close = opens + pred.Close1
    pred_lo, pred_hi = opens + pred.Low1, opens + pred.High1
    tinc, tdec = truth > opens, truth < opens
    pinc, pdec = pred_close > opens, pred_close < opens
    
    p = bk.figure(
        plot_width=800, plot_height=600,
        title=title,
        active_scroll='wheel_zoom')
    if include_tclose:
            p.vbar(
                X.index[tinc], W,
                opens[tinc], truth[tinc],
                line_width=1,  line_color='black',
                fill_color='red', fill_alpha=.75,
                legend='Ground truth increase'
            )
            p.vbar(
                X.index[tdec], W,
                opens[tdec], truth[tdec],
                line_width=1, line_color='black',
                fill_color='blue', fill_alpha=.75,
                legend='Ground truth decrease'
            )
    
    if include_tlohi:
        p.vbar(
            opens.index, 1.1*W,
            opens + y.Low1, opens + y.High1,
            fill_color='gray', fill_alpha=.1,
            legend='Ground truth Lo-Hi'
        )
    if include_plohi:
        p.vbar(
            opens.index, W/2,
            pred_lo, pred_hi,
            fill_color='yellow', fill_alpha=.2,
            legend='Predicted Lo-Hi'
        )
    if include_pclose:
        p.segment(x0=X.index, y0=opens,
                  x1=X.index, y1=pred_close,
                  line_color='lightgray', line_width=2,
                  legend='Prediction')
        p.circle(X.index[pinc], pred_close[pinc],
                 fill_color='red', line_color='lightgray',
                 line_width=2, radius=.25*W,
                 legend='Predicted increase')
        p.circle(X.index[pdec], pred_close[pdec],
                 fill_color='blue', line_color='lightgray',
                 line_width=2, radius=.25*W,
                 legend='Predicted decrease')
    bk.show(p)

In [18]:
rnn = RNN(n_in=X.shape[1], n_mem=48, days_out=y.shape[1])
rnn.init_weights(.1)
optimizer = torch.optim.RMSprop(rnn.parameters(), lr=.001)

In [19]:
n_train = X.shape[0]*2//3
# Skipping first WINDOW_IN rows as the contain NA's
Xtr, ytr = X.iloc[WINDOW_IN:n_train, :], y.iloc[WINDOW_IN:n_train, :]
Xte, yte = X.iloc[n_train:, :], y.iloc[n_train:, :]

In [20]:
train(rnn, Xtr, ytr, optimizer, epochs=200)

Epoch #1; Loss: 52.90349197387695
Epoch #2; Loss: 51.131099700927734
Epoch #3; Loss: 50.44309616088867
Epoch #4; Loss: 49.67802047729492
Epoch #5; Loss: 48.80824279785156
Epoch #6; Loss: 47.867008209228516
Epoch #7; Loss: 46.87910461425781
Epoch #8; Loss: 45.91950607299805
Epoch #9; Loss: 44.96513748168945
Epoch #10; Loss: 44.065216064453125
Epoch #11; Loss: 43.189353942871094
Epoch #12; Loss: 42.3673095703125
Epoch #13; Loss: 41.60542678833008
Epoch #14; Loss: 40.84962844848633
Epoch #15; Loss: 40.16086196899414
Epoch #16; Loss: 39.45401382446289
Epoch #17; Loss: 38.81395721435547
Epoch #18; Loss: 38.15528106689453
Epoch #19; Loss: 37.54817199707031
Epoch #20; Loss: 36.827816009521484
Epoch #21; Loss: 36.26149368286133
Epoch #22; Loss: 35.60886001586914
Epoch #23; Loss: 35.300045013427734
Epoch #24; Loss: 34.54927444458008
Epoch #25; Loss: 34.31551742553711
Epoch #26; Loss: 33.33742904663086
Epoch #27; Loss: 33.07380294799805
Epoch #28; Loss: 32.33454132080078
Epoch #29; Loss: 32.0569

In [37]:
evaluate_model(rnn, Xtr, ytr,
               include_plohi=False,
               title='Evaluation on the train set')

In [39]:
evaluate_model(rnn, Xte, yte,
               include_plohi=True, include_tlohi=True,
               title='Evaluation on the test set')

In [23]:
# predictions(rnn, Xtr)

In [40]:
train(rnn, Xtr, ytr, optimizer, epochs=32)

Epoch #1; Loss: 12.271028518676758
Epoch #2; Loss: 12.096423149108887
Epoch #3; Loss: 12.021601676940918
Epoch #4; Loss: 11.9465970993042
Epoch #5; Loss: 11.753436088562012
Epoch #6; Loss: 11.807735443115234
Epoch #7; Loss: 11.729680061340332
Epoch #8; Loss: 11.566496849060059
Epoch #9; Loss: 11.651970863342285
Epoch #10; Loss: 12.102075576782227
Epoch #11; Loss: 12.162708282470703
Epoch #12; Loss: 11.741005897521973
Epoch #13; Loss: 11.373468399047852
Epoch #14; Loss: 11.35594367980957
Epoch #15; Loss: 11.54157829284668
Epoch #16; Loss: 11.590805053710938
Epoch #17; Loss: 11.666605949401855
Epoch #18; Loss: 11.70773696899414
Epoch #19; Loss: 12.109033584594727
Epoch #20; Loss: 11.79065227508545
Epoch #21; Loss: 11.427579879760742
Epoch #22; Loss: 11.317062377929688
Epoch #23; Loss: 11.251137733459473
Epoch #24; Loss: 11.136885643005371
Epoch #25; Loss: 11.257411003112793
Epoch #26; Loss: 11.434881210327148
Epoch #27; Loss: 11.681938171386719
Epoch #28; Loss: 11.698261260986328
Epoch #

In [41]:
evaluate_model(rnn, Xtr, ytr,
               include_tlohi=True,
               title='Evaluation on the train set')

In [42]:
train(rnn, Xtr, ytr, optimizer, epochs=20)

Epoch #1; Loss: 11.386198043823242
Epoch #2; Loss: 11.0219144821167
Epoch #3; Loss: 11.064748764038086
Epoch #4; Loss: 11.50508975982666
Epoch #5; Loss: 11.535187721252441
Epoch #6; Loss: 11.143187522888184
Epoch #7; Loss: 10.95913314819336
Epoch #8; Loss: 10.775016784667969
Epoch #9; Loss: 10.883176803588867
Epoch #10; Loss: 11.234362602233887
Epoch #11; Loss: 11.49226188659668
Epoch #12; Loss: 11.268341064453125
Epoch #13; Loss: 10.9742431640625
Epoch #14; Loss: 10.708651542663574
Epoch #15; Loss: 10.792441368103027
Epoch #16; Loss: 11.067096710205078
Epoch #17; Loss: 11.175727844238281
Epoch #18; Loss: 11.051907539367676
Epoch #19; Loss: 11.05583381652832
Epoch #20; Loss: 11.065550804138184


In [43]:
evaluate_model(rnn, Xte, yte,
               include_tlohi=True,
               title='Evaluation on the test set')

In [47]:
%%javascript
IPython.notebook.kernel.execute('nb_name = ' + '"' + IPython.notebook.notebook_name + '"')

<IPython.core.display.Javascript object>

In [49]:
torch.save(rnn.state_dict(), nb_name + '.weights')