# Time Series predictions using LSTM

Yet another try

In [1]:
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime
import numpy as np
import graphviz

import sys
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import bokeh.models
import bokeh.plotting as bk
bk.output_notebook()

ModuleNotFoundError: No module named 'pandas_datareader'

In [None]:
if not os.path.exists('GOOG.csv'):
    df = data.DataReader('GOOG', 'yahoo', datetime(2010, 1, 1), datetime(2017, 11, 1))
    df.to_csv('GOOG.csv')
else:
    df = pd.read_csv('GOOG.csv')

In [None]:
WINDOW_IN = 14
WINDOW_OUT = 7

In [None]:
df.head()

In [None]:
def build_corpus(df, wnd_in=WINDOW_IN, wnd_out=WINDOW_OUT):
    df = df.drop('Date', axis=1)
    df = df.div(df.max())
    
    X, X_columns = [], []
    
    X += [df.Open.shift(k) for k in range(wnd_in + 1)]
    X_columns += ['Open_%s_ago' % k for k in range(wnd_in + 1)]
    X += [df.Low.shift(1+k) for k in range(wnd_in)]
    X_columns += ['Low_%s_ago' % (1+k) for k in range(wnd_in)]
    X += [df.High.shift(1+k) for k in range(wnd_in)]
    X_columns += ['High_%s_ago' % (1+k) for k in range(wnd_in)]
    
    X = pd.concat(X, axis=1)
    X.columns = X_columns
    
    X.loc[:, X_columns] = X.loc[:, X_columns].sub(X.Open_0_ago, axis='rows')
    X.Open_0_ago = df.Open.diff()
    X = X.fillna(0)
    # Let's try without .diff()
    
    y = pd.concat([df.Close.shift(k) for k in range(wnd_out)], axis=1)
    y = y.sub(df.Open, axis='rows')
    y = y.fillna(0)
    y.columns = ['Close_%s' % (k) for k in range(wnd_out)]
    return X, y

In [None]:
X, y = build_corpus(df)

In [None]:
X.tail()

In [None]:
y.head()

In [None]:
class RNN4(nn.Module):
    
    
    def __init__(self, n_in, n_mem, n_out):
        super(RNN4, self).__init__()
        self.n_in, self.n_mem, self.n_out = n_in, n_mem, n_out
        self.xm2f = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2invent = nn.Linear(in_features=(n_mem + n_in), out_features=n_mem)
        self.xm2save = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.x2pre = nn.Linear(in_features=n_in, out_features=n_mem)
        self.pre2o = nn.Linear(in_features=n_mem, out_features=n_out)
        
    def init_weights(self, var=.1):
        for tr in [
                self.xm2f,
                self.xm2invent,
                self.xm2save,
                self.x2pre,
                self.pre2o,
                ]:
            tr.weight.data.uniform_(-var, var)
    
    def forward(self, x, mem):
        xmem = torch.cat((x, mem))
        mem = mem * F.sigmoid(self.xm2f(xmem))
        new_mem = F.tanh(self.xm2invent(xmem))
        mem_mask = F.sigmoid(self.xm2save(xmem))
        mem = mem + new_mem * mem_mask
        pre = self.x2pre(x)
        o = self.pre2o(pre * mem)
        return (o, mem)
    
    def init_mem(self):
        return Variable(torch.zeros(self.n_mem))

In [None]:
def my_objective(y, y_pred, alpha=.1):
    n = sum(y.data.shape)
    
    mask = Variable(((y_pred*y).data < 0).float())
    fee = (mask*(y_pred - y)**2).sum()
    mse = torch.sum((y_pred - y)**2)
    
    L = fee + alpha*mse
    return L

In [None]:
OBJECTIVE = my_objective

In [None]:
def loss(model, X, y,
         objective,
         lr=None,
         max_norm=1000.,
        ):
    mem = model.init_mem()
    mean_L = 0
    X = torch.Tensor(X.as_matrix())
    Y = torch.Tensor(y.as_matrix())
    for i in range(X.shape[0]):
        model.zero_grad()
        x = Variable(X[i, :])
        y = Variable(Y[i, :], requires_grad=1)
        y_pred, mem = model(x, mem)
        
        L = objective(y, y_pred)
        mean_L += L.data
        if lr is not None:
            L.backward(retain_graph=1)
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm)
            for p in model.parameters():
                p.data.sub_(lr * p.grad.data)
    mean_L = mean_L/X.shape[0]
    return mean_L[0]

In [None]:
def train(model, X, y, objective=OBJECTIVE, epochs=6, base_lr=.04):
    for epoch in range(1, epochs+1):
        print('Epoch #%s; Loss: %s' % (epoch, loss(model, X, y, objective, lr=base_lr/(1 + epoch//4))))

In [None]:
def predictions(model, X):
    mem = model.init_mem()
    pred = torch.zeros(X.shape[0], model.n_out)
    index = X.index
    X = torch.Tensor(X.as_matrix())
    for i in range(X.shape[0]):
        x = Variable(X[i, :])
        y, mem = model(x, mem)
        pred[i, :] = y.data
    pred = pd.DataFrame(pred.numpy())
    pred.columns = ['Close_%s_next' % (1+k) for k in range(model.n_out)]
    pred.index = index
    return pred

In [None]:
def plot_stocks(opn, cls, p, w=4, clr='gray', legend='Stocks'):
    inc = cls > opn
    dec = cls < opn
    p.vbar(
        opn.index[inc],
        w,
        opn[inc],
        cls[inc],
        line_width=1,
        fill_color='#F2583E',
        line_color=clr,
        legend=legend + ' increase'
    )
    p.vbar(
        opn.index[dec],
        w,
        opn[dec],
        cls[dec],
        line_width=1,
        line_color=clr,
        fill_color='#D5E1DD',
        legend=legend + ' decrease'
    )
    
def evaluate_model(model, X, y, base_price=0, title='Model evaluation'):
    pred = predictions(model, X)
    p = bk.figure(
        plot_width=800, plot_height=600,
        title=title,
        active_scroll='wheel_zoom')
    opens = base_price + X.iloc[:, 0].cumsum()
    truth = opens + y.iloc[:, 0]
    pred = opens + pred.iloc[:, 0]
    plot_stocks(opens, truth, p, w=1, clr='green', legend='Ground truth')
    #   plot_stocks(opens, pred, p, w=2, clr='gray', legend='Predictions')
    p.segment(x0=X.index, y0=opens, x1=X.index, y1=pred,
              line_color='gray', legend='Predictions')
    p.circle(X.index, pred, fill_color='gray', line_color='black', legend='Predictions')
    bk.show(p)

In [None]:
rnn4 = RNN4(n_in=X.shape[1], n_mem=64, n_out=y.shape[1])
rnn4.init_weights(.1)

In [None]:
n_train = X.shape[0]*2//3
# Skipping first WINDOW_IN rows as the contain NA's
Xtr, ytr = X.iloc[WINDOW_IN:n_train, :], y.iloc[WINDOW_IN:n_train, :]
Xte, yte = X.iloc[n_train:, :], y.iloc[n_train:, :]

In [None]:
train(rnn4, Xtr, ytr, epochs=100, base_lr=.0005)

In [None]:
evaluate_model(rnn4, Xtr, ytr, title='Evaluation on the train set')

In [None]:
evaluate_model(rnn4, Xte, yte, title='Evaluation on the test set')

In [None]:
predictions(rnn4, Xtr)