In [1]:
import pandas as pd
from pandas_datareader import data, wb
from datetime import datetime
import numpy as np

In [3]:
df = data.DataReader('GOOG', 'yahoo', datetime(2010, 1, 1), datetime(2017, 11, 1))

In [5]:
WND_DAYS = 7
MAX_NORM = 100

In [6]:
df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,311.44931,312.721039,310.103088,311.349976,311.349976,3937800
2010-01-05,311.563568,311.891449,308.76181,309.978882,309.978882,6048500
2010-01-06,310.907837,310.907837,301.220856,302.164703,302.164703,8009000
2010-01-07,302.731018,303.029083,294.410156,295.130463,295.130463,12912000
2010-01-08,294.08725,299.675903,292.651581,299.06488,299.06488,9509900
2010-01-11,300.276978,300.276978,295.100647,298.612823,298.612823,14519600
2010-01-12,296.893982,297.147339,292.100159,293.332153,293.332153,9769600
2010-01-13,286.382355,292.28894,285.095734,291.648102,291.648102,13077600
2010-01-14,290.063416,295.180145,289.521942,293.019196,293.019196,8535300
2010-01-15,294.75293,294.862213,287.152344,288.126007,288.126007,10939600


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

In [8]:
mu = df.mean()
sigma = df.std()

df_norm = df.sub(mu).div(sigma)
df_norm.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,1973.0,1973.0,1973.0,1973.0,1973.0,1973.0
mean,1.72954e-15,6.871791e-16,-5.728368e-16,-4.929323e-16,-4.929323e-16,-6.172908e-17
std,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.343641,-1.344517,-1.343012,-1.347777,-1.347777,-1.30479
25%,-0.9508502,-0.951804,-0.9506923,-0.9501035,-0.9501035,-0.7333138
50%,0.01903229,0.02055329,0.01101686,0.01616501,0.01616501,-0.1770922
75%,0.8675876,0.8832963,0.8685769,0.8755521,0.8755521,0.3875852
max,2.381592,2.494103,2.415938,2.419723,2.419723,8.992143


In [9]:
n_train = int(2/3*df.shape[0])
Xtr, Xte = df_norm.iloc[:n_train, :], df_norm.iloc[n_train:,:]

In [10]:
N_FEATS = Xtr.shape[1]
N_FEATS

6

In [11]:
df = df.diff()
df.iloc[0, :] = 0
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
2010-01-05,0.114258,-0.829590,-1.341278,-1.371094,-1.371094,2110700.0
2010-01-06,-0.655731,-0.983612,-7.540954,-7.814179,-7.814179,1960500.0
2010-01-07,-8.176819,-7.878754,-6.810700,-7.034240,-7.034240,4903000.0
2010-01-08,-8.643768,-3.353180,-1.758575,3.934417,3.934417,-3402100.0
2010-01-11,6.189728,0.601075,2.449066,-0.452057,-0.452057,5009700.0
2010-01-12,-3.382996,-3.129639,-3.000488,-5.280670,-5.280670,-4750000.0
2010-01-13,-10.511627,-4.858399,-7.004425,-1.684051,-1.684051,3308000.0
2010-01-14,3.681061,2.891205,4.426208,1.371094,1.371094,-4542300.0
2010-01-15,4.689514,-0.317932,-2.369598,-4.893189,-4.893189,2404300.0


In [17]:
class RNN1(nn.Module):
    
    
    def __init__(self, n_in, n_mem, n_out):
        super(RNN1, self).__init__()
        self.n_in, self.n_mem, self.n_out = n_in, n_mem, n_out
        self.forget = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.learn_new = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.save_new = nn.Linear(in_features=(n_in + n_mem), out_features=n_mem)
        self.output = nn.Linear(in_features=(n_in + n_mem), out_features=n_out)
        
    def init_weights(self, var=.1):
        self.forget.weight.data.uniform_(-var, var)
        self.learn_new.weight.data.uniform_(-var, var)
        self.save_new.weight.data.uniform_(-var, var)
        self.output.weight.data.uniform_(-var, var)
    
    def forward(self, x, mem):
        # print(x.data.shape, mem.data.shape)
        xmem = torch.cat((x, mem))
        forget = F.sigmoid(self.forget(xmem))
        mem = mem*forget
        new_mem = F.tanh(self.learn_new(xmem))
        mem_mask = F.sigmoid(self.save_new(xmem))
        mem = mem + new_mem*mem_mask
        xmem = torch.cat((x, mem))
        out = self.output(xmem)
        return (out, mem)
    
    def init_mem(self):
        return Variable(torch.zeros(self.n_mem))

In [46]:
rnn1 = RNN1(n_in=WND_DAYS*N_FEATS, n_mem=48, n_out=WND_DAYS)

In [51]:
def train(model, X, criterion, wnd=WND_DAYS, lr=.01):
    mem = model.init_mem()
    total_L = 0
    for i in range(wnd, X.shape[0]):
        model.zero_grad()
        x = X.iloc[i-wnd:i,:].as_matrix().reshape(-1)
        x = torch.Tensor(x)
        x = Variable(x)
        y = torch.Tensor(X.iloc[i:i+wnd,:].loc[:,'Close'].as_matrix().reshape(-1))
        y = Variable(y)
        y_pred, mem = model(x, mem)
        y_pred = y_pred[:y.data.shape[0]]
        L = criterion(y, y_pred)
        L.backward(retain_graph=1)
        total_L += L.data
        torch.nn.utils.clip_grad_norm(model.parameters(), MAX_NORM)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)
        total_L

In [52]:
def objective(y_pred, y):
    return -torch.sum(y_pred*y)
criterion = objective

In [53]:
train(rnn1, Xtr, criterion)

In [54]:
df.iloc[:4,:].as_matrix().reshape(-1)

array([  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.14258000e-01,  -8.29590000e-01,  -1.34127800e+00,
        -1.37109400e+00,  -1.37109400e+00,   2.11070000e+06,
        -6.55731000e-01,  -9.83612000e-01,  -7.54095400e+00,
        -7.81417900e+00,  -7.81417900e+00,   1.96050000e+06,
        -8.17681900e+00,  -7.87875400e+00,  -6.81070000e+00,
        -7.03424000e+00,  -7.03424000e+00,   4.90300000e+06])

In [55]:
for i in range(100):
    print('Epoch #%s; Loss: %s' % (i, train(rnn1, Xtr, criterion)))

Epoch #0; Loss: None
Epoch #1; Loss: None
Epoch #2; Loss: None
Epoch #3; Loss: None
Epoch #4; Loss: None
Epoch #5; Loss: None
Epoch #6; Loss: None
Epoch #7; Loss: None
Epoch #8; Loss: None
Epoch #9; Loss: None
Epoch #10; Loss: None
Epoch #11; Loss: None
Epoch #12; Loss: None
Epoch #13; Loss: None
Epoch #14; Loss: None
Epoch #15; Loss: None
Epoch #16; Loss: None
Epoch #17; Loss: None
Epoch #18; Loss: None
Epoch #19; Loss: None
Epoch #20; Loss: None
Epoch #21; Loss: None
Epoch #22; Loss: None
Epoch #23; Loss: None
Epoch #24; Loss: None
Epoch #25; Loss: None
Epoch #26; Loss: None
Epoch #27; Loss: None
Epoch #28; Loss: None
Epoch #29; Loss: None
Epoch #30; Loss: None
Epoch #31; Loss: None
Epoch #32; Loss: None
Epoch #33; Loss: None
Epoch #34; Loss: None
Epoch #35; Loss: None
Epoch #36; Loss: None
Epoch #37; Loss: None
Epoch #38; Loss: None
Epoch #39; Loss: None
Epoch #40; Loss: None
Epoch #41; Loss: None
Epoch #42; Loss: None
Epoch #43; Loss: None
Epoch #44; Loss: None
Epoch #45; Loss: Non

In [57]:
rnn1

RNN1 (
  (forget): Linear (90 -> 48)
  (learn_new): Linear (90 -> 48)
  (save_new): Linear (90 -> 48)
  (output): Linear (90 -> 7)
)

In [59]:
list(rnn1.parameters())

[Parameter containing:
 -2.2487e-01 -6.2654e-02 -6.0793e-02  ...   1.7059e-01  1.1094e-01  6.4523e-02
 -3.5272e+00 -3.4744e+00 -3.4596e+00  ...   5.3826e+00  5.2797e+00  5.1572e+00
 -2.5382e-01 -2.5437e-01 -2.6409e-01  ...   8.3283e-02  1.4353e-01  1.2533e-01
                 ...                   ⋱                   ...                
 -3.8858e-01 -4.7928e-01 -4.8079e-01  ...   2.9358e-01  2.8330e-01  9.7691e-02
 -8.6692e-02 -1.4782e-01 -1.6851e-01  ...   1.2947e-01  1.8325e-03  3.9815e-02
 -2.2615e+00 -2.3010e+00 -2.2295e+00  ...   2.9161e+00  2.9589e+00  2.9119e+00
 [torch.FloatTensor of size 48x90], Parameter containing:
  0.2568
  3.7631
  0.2293
  0.5298
  0.2180
  6.0017
  0.3395
  0.2533
  0.2535
  0.4326
  1.7073
  0.2304
  0.1737
  2.9252
  0.1645
  1.3335
  0.2527
  1.4192
  0.1942
  0.1640
  0.2174
  0.6106
  5.8307
  0.3745
  0.5693
  0.2421
  0.7127
  0.1375
  4.6382
  0.2956
  0.2462
  0.1632
  3.5803
  0.3070
  0.2281
  0.1579
  0.4263
  6.5542
  0.2684
  4.7433
  0.14

In [61]:
torch.save(rnn1.state_dict(), 'rnn1.weights')

In [63]:
df.to_csv('diff.csv')
df_norm.to_csv('diff_norm.csv')