# Time Series predictions using LSTM

Yet another try

In [1]:
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime
import numpy as np
import graphviz

import sys
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import bokeh.models
import bokeh.plotting as bk
bk.output_notebook()

In [2]:
if not os.path.exists('GOOG.csv'):
    df = data.DataReader('GOOG', 'yahoo', datetime(2010, 1, 1), datetime(2017, 11, 1))
    df.to_csv('GOOG.csv')
else:
    df = pd.read_csv('GOOG.csv')

In [3]:
WINDOW_IN = 14
WINDOW_OUT = 7

In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-01-04,311.44931,312.721039,310.103088,311.349976,311.349976,3937800
1,2010-01-05,311.563568,311.891449,308.76181,309.978882,309.978882,6048500
2,2010-01-06,310.907837,310.907837,301.220856,302.164703,302.164703,8009000
3,2010-01-07,302.731018,303.029083,294.410156,295.130463,295.130463,12912000
4,2010-01-08,294.08725,299.675903,292.651581,299.06488,299.06488,9509900


In [5]:
def build_corpus(df, wnd_in=WINDOW_IN, wnd_out=WINDOW_OUT):
    df = df.drop('Date', axis=1)
    # df = df.div(df.max())
    
    X, X_columns = [], []
    
    lb = range(1, wnd_in+1)
    X += [df.Close.shift(k) for k in lb]
    X_columns += ['Close_%s_ago' % k for k in lb]
    X += [df.Low.shift(k) for k in lb]
    X_columns += ['Low_%s_ago' % k for k in lb]
    X += [df.High.shift(k) for k in lb]
    X_columns += ['High_%s_ago' % k for k in lb]
    
    X = pd.concat(X, axis=1)
    X.columns = X_columns
    
    X.loc[:, X_columns] = X.loc[:, X_columns].sub(X.Close_1_ago, axis='rows')
    X.Close_1_ago = df.Close - df.Close.shift(1)
    X = X.fillna(0)
    
    lf = range(wnd_out)
    y = pd.concat([df.Close.shift(-k) for k in lf], axis=1)
    y = y.sub(df.Open, axis='rows')
    y = y.fillna(0)
    y.columns = ['Next_%s' % k for k in lf]
    return X, y

In [6]:
X, y = build_corpus(df)

In [7]:
X.head()

Unnamed: 0,Close_1_ago,Close_2_ago,Close_3_ago,Close_4_ago,Close_5_ago,Close_6_ago,Close_7_ago,Close_8_ago,Close_9_ago,Close_10_ago,...,High_5_ago,High_6_ago,High_7_ago,High_8_ago,High_9_ago,High_10_ago,High_11_ago,High_12_ago,High_13_ago,High_14_ago
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-1.371094,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-7.814179,1.371094,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-7.03424,7.814179,9.185273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.934417,7.03424,14.848419,16.219513,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
y.head()

Unnamed: 0,Next_0,Next_1,Next_2,Next_3,Next_4,Next_5,Next_6
0,-0.099334,-1.470428,-9.284607,-16.318847,-12.38443,-12.836487,-18.117157
1,-1.584686,-9.398865,-16.433105,-12.498688,-12.950745,-18.231415,-19.915466
2,-8.743134,-15.777374,-11.842957,-12.295014,-17.575684,-19.259735,-17.888641
3,-7.600555,-3.666138,-4.118195,-9.398865,-11.082916,-9.711822,-14.605011
4,4.97763,4.525573,-0.755097,-2.439148,-1.068054,-5.961243,-2.175843


In [9]:
class RNN4(nn.Module):
    
    
    def __init__(self, n_in, n_mem, n_out):
        super(RNN4, self).__init__()
        self.n_in, self.n_mem, self.n_out = n_in, n_mem, n_out
        self.xm2f = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2invent = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2save = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.x2pre = nn.Linear(in_features=n_in, out_features=n_mem)
        self.pre2o = nn.Linear(in_features=n_mem, out_features=n_out)
        
    def init_weights(self, var=.1):
        for tr in [
                self.xm2f,
                self.xm2invent,
                self.xm2save,
                self.x2pre,
                self.pre2o,
                ]:
            tr.weight.data.uniform_(-var, var)
    
    def forward(self, x, mem):
        xmem = torch.cat((x, mem))
        mem = mem * F.sigmoid(self.xm2f(xmem))
        new_mem = F.tanh(self.xm2invent(xmem))
        mem_mask = F.sigmoid(self.xm2save(xmem))
        mem = mem + new_mem * mem_mask
        pre = F.tanh(self.x2pre(x))
        o = self.pre2o(pre * mem)
        return (o, mem)
    
    def init_mem(self):
        return Variable(torch.zeros(self.n_mem))

In [10]:
def my_objective(y, y_pred, alpha=.4):
    n = sum(y.data.shape)
    
    mask = Variable(((y_pred*y).data < 0).float())
    fee = (mask*(y_pred - y)**2).sum()
    mse = torch.sum((y_pred - y)**2)
    
    L = (1-alpha)*fee + alpha*mse
    return L

In [11]:
OBJECTIVE = my_objective

In [12]:
def loss(model, X, y,
         objective,
         lr=None,
         max_norm=1000.,
        ):
    mem = model.init_mem()
    mean_L = 0
    X = torch.Tensor(X.as_matrix())
    Y = torch.Tensor(y.as_matrix())
    for i in range(X.shape[0]):
        model.zero_grad()
        x = Variable(X[i, :])
        y = Variable(Y[i, :], requires_grad=1)
        y_pred, mem = model(x, mem)
        
        L = objective(y, y_pred)
        mean_L += L.data
        if lr is not None:
            L.backward(retain_graph=1)
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm)
            for p in model.parameters():
                p.data.sub_(lr * p.grad.data)
    mean_L = mean_L/X.shape[0]
    return mean_L[0]

In [13]:
def train(model, X, y, objective=OBJECTIVE, epochs=6, base_lr=.04):
    for epoch in range(1, epochs+1):
        print('Epoch #%s; Loss: %s' % (epoch, loss(model, X, y, objective, lr=base_lr/(1 + epoch//4))))

In [14]:
def predictions(model, X):
    mem = model.init_mem()
    pred = torch.zeros(X.shape[0], model.n_out)
    index = X.index
    X = torch.Tensor(X.as_matrix())
    for i in range(X.shape[0]):
        x = Variable(X[i, :])
        y, mem = model(x, mem)
        pred[i, :] = y.data
    pred = pd.DataFrame(pred.numpy())
    pred.columns = ['Next_%s' % (1+k) for k in range(model.n_out)]
    pred.index = index
    return pred

In [15]:
def plot_stocks(opn, cls, p, w=4, clr='gray', legend='Stocks'):
    inc = cls > opn
    dec = cls < opn
    p.vbar(
        opn.index[inc],
        w,
        opn[inc],
        cls[inc],
        line_width=1,
        fill_color='#F2583E',
        line_color=clr,
        legend=legend + ' increase'
    )
    p.vbar(
        opn.index[dec],
        w,
        opn[dec],
        cls[dec],
        line_width=1,
        line_color=clr,
        fill_color='#D5E1DD',
        legend=legend + ' decrease'
    )
    
def evaluate_model(model, X, y, base_price=0, title='Model evaluation'):
    pred = predictions(model, X)
    p = bk.figure(
        plot_width=800, plot_height=600,
        title=title,
        active_scroll='wheel_zoom')
    opens = base_price + X.iloc[:, 0].cumsum()
    truth = opens + y.iloc[:, 0]
    pred = opens + pred.iloc[:, 0]
    plot_stocks(opens, truth, p, w=1, clr='green', legend='Ground truth')
    #   plot_stocks(opens, pred, p, w=2, clr='gray', legend='Predictions')
    p.segment(x0=X.index, y0=opens, x1=X.index, y1=pred,
              line_color='gray', legend='Predictions')
    p.circle(X.index, pred, fill_color='gray', line_color='black', legend='Predictions')
    bk.show(p)

In [16]:
rnn4 = RNN4(n_in=X.shape[1], n_mem=64, n_out=y.shape[1])
rnn4.init_weights(.15)

In [17]:
n_train = X.shape[0]*2//3
# Skipping first WINDOW_IN rows as the contain NA's
Xtr, ytr = X.iloc[WINDOW_IN:n_train, :], y.iloc[WINDOW_IN:n_train, :]
Xte, yte = X.iloc[n_train:, :], y.iloc[n_train:, :]

In [18]:
train(rnn4, Xtr, ytr, epochs=25, base_lr=.0001)

Epoch #1; Loss: 539.780029296875
Epoch #2; Loss: 509.1077880859375
Epoch #3; Loss: 510.4363098144531
Epoch #4; Loss: 497.5993347167969
Epoch #5; Loss: 467.9117431640625
Epoch #6; Loss: 457.0121154785156
Epoch #7; Loss: 437.8673095703125
Epoch #8; Loss: 421.3491516113281
Epoch #9; Loss: 419.0072937011719
Epoch #10; Loss: 384.0285339355469
Epoch #11; Loss: 369.7085876464844
Epoch #12; Loss: 369.2383117675781
Epoch #13; Loss: 341.6643981933594
Epoch #14; Loss: 341.3677673339844
Epoch #15; Loss: 332.9144287109375
Epoch #16; Loss: 327.3707580566406
Epoch #17; Loss: 298.39923095703125
Epoch #18; Loss: 287.6095886230469
Epoch #19; Loss: 283.7243347167969
Epoch #20; Loss: 272.34490966796875
Epoch #21; Loss: 266.6215515136719
Epoch #22; Loss: 254.4540252685547
Epoch #23; Loss: 248.6024169921875
Epoch #24; Loss: 239.6188201904297
Epoch #25; Loss: 233.03506469726562


In [19]:
evaluate_model(rnn4, Xtr, ytr, title='Evaluation on the train set')

In [20]:
evaluate_model(rnn4, Xte, yte, title='Evaluation on the test set')

In [21]:
predictions(rnn4, Xtr)

Unnamed: 0,Next_1,Next_2,Next_3,Next_4,Next_5,Next_6,Next_7
14,-0.712279,-1.582871,-2.775063,-3.462714,-3.767828,-4.279693,-4.058171
15,-0.426591,-0.883365,-1.764541,-2.062554,-2.052087,-2.653935,-2.319473
16,-0.616239,-1.153832,-1.741931,-1.842403,-1.667792,-2.130270,-1.670042
17,-0.586475,-0.706601,-0.915050,-0.720156,-0.354420,-0.691308,-0.250979
18,-0.481053,-0.235515,-0.031232,0.478542,1.062095,0.770853,1.240334
19,-0.407219,0.101527,0.718832,1.598471,2.490810,2.251233,2.823175
20,-0.357019,0.426784,1.493304,2.782431,4.028698,3.863516,4.539891
21,-0.275949,0.701530,2.147033,3.846473,5.459529,5.406880,6.189274
22,-2.721254,-1.962470,-2.363371,-1.964520,-2.391059,-1.907979,-1.957613
23,-0.411156,1.194391,2.845355,4.832972,6.903548,7.639067,8.596565
