In [19]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import torch.utils.data as data_utils
import pandas as pd
import os
import sys
import gc
import pickle as pkl
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
sys.path.append('../')
from wiki.utils import clock

In [2]:
torch.manual_seed(1)
torch.cuda.set_device(0)

In [3]:
batch_size = 256

In [4]:
base_dir = '../data/'

In [5]:
train = pd.read_csv(base_dir+'train_1.csv').fillna(0)

In [6]:
X = train.drop('Page', axis=1).values

In [7]:
shape = X.shape ; shape

(145063, 550)

In [9]:
sc = StandardScaler()
X = sc.fit_transform(X.T).T
print(X.shape)
assert(np.isclose(np.mean(X[0]),0))
# input shape: samples, timesteps, features
X = X.reshape(X.shape[0], X.shape[1], 1)
print(X.shape)
np.max(X)

In [10]:
X.shape

(145063, 550)

In [12]:
X_train, X_test = train_test_split(X, test_size=0.1, random_state=12)

The unsqueeze below is adding another dimension to the tensors (size 1, at the end). Also I believe the shuffle only shuffles the first dimension (thus shuffling the sequences whole). This could also probably be turned off with no ill effects

In [13]:
valloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(X_test).float().unsqueeze(-1)
    ),
    batch_size=batch_size, shuffle=False
)

In [45]:
testloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(X[:,:-60]).float().unsqueeze(-1)
    ),
    batch_size=batch_size, shuffle=False
)

In [42]:
class RNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.hidden_units = 128
        self.n_layers = 2
        
        self.rnn = nn.GRU(
            input_size=1,
            hidden_size=self.hidden_units,
            num_layers=self.n_layers, #number of RNN layers
            batch_first=True, #batch dimension is first
            #nonlinearity='relu',
            dropout=0.2
        )

        #I can change the below to two softplus outputs for
        #mean and variance in the paper version (see notes below)
        self.out = nn.Linear(self.hidden_units, 1)
        
    def forward(self, x, h_state):
        # dimensions:
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        r_out, h_state = self.rnn(x, h_state)
        return self.out(r_out), h_state
    
    def init_hidden(self, batch_size):
        hidden = Variable(torch.zeros(self.n_layers, batch_size, self.hidden_units)).cuda()
        return hidden

In [30]:
def validate(model, valloader, pred_date_len):
    loss_func = nn.MSELoss()
    loss = 0
    for data_all in valloader:
        sequences = data_all[:,:-pred_date_len,:]
        targets = Variable(data_all[:,-pred_date_len:,:], volatile=True).cuda()
        output = predict_batch(model, sequences, pred_date_len)
        loss += loss_func(output, targets)
    return loss.data[0]/length
    print('Val loss, %f' % float(loss.data[0])/pred_date_len)

In [16]:
def predict_batch(model, batch, pred_date_len):
    output = []
    h_state = model.init_hidden(batch.size()[0])
    x=Variable(batch, volatile=True).cuda()
    encoder_out, h_state = model(x, h_state)

    input_variable = encoder_out[:,-1:,:]
    output.append(input_variable)
    for i in range(pred_date_len-1):
        encoder_out, h_state = model(input_variable, h_state)
        input_variable = encoder_out
        output.append(encoder_out)
    
    return torch.cat(output, dim=1)

In [38]:
def predict(model, dataloader, pred_date_len):
    all_output = []
    for data_all in dataloader:
        output = predict_batch(model, data_all, pred_date_len)
        all_output.append(output)
    return torch.cat(all_output, dim=0)

The below is borrowed from some bloke on the discussion

In [17]:
model = torch.load('rnn.pkl')
for p in model.parameters():
    pass
    #p.requires_grad = False

In [31]:
loss = validate(model, valloader, 60)



In [32]:
loss

6.087971845166369e-08

In [47]:
with clock():
    predictions = predict(model, testloader, 60)



In [51]:
shape = predictions.data.shape

In [59]:
predictions = np.reshape(sc.inverse_transform(np.reshape(predictions.cpu().data.numpy(),(-1,1))), shape)

In [61]:
predictions[400,:,:]

array([[-1291.57189941],
       [-1287.5291748 ],
       [-1282.36621094],
       [-1277.69030762],
       [-1273.59887695],
       [-1270.38415527],
       [-1267.90014648],
       [-1265.95178223],
       [-1264.6854248 ],
       [-1263.37036133],
       [-1262.49365234],
       [-1261.90917969],
       [-1261.27600098],
       [-1260.83752441],
       [-1260.6427002 ],
       [-1260.35046387],
       [-1260.00952148],
       [-1259.91210938],
       [-1259.71728516],
       [-1259.52246094],
       [-1259.52246094],
       [-1259.52246094],
       [-1259.47375488],
       [-1259.37634277],
       [-1259.32763672],
       [-1259.32763672],
       [-1259.23022461],
       [-1259.1328125 ],
       [-1259.1328125 ],
       [-1259.1328125 ],
       [-1259.1328125 ],
       [-1259.1328125 ],
       [-1259.08410645],
       [-1258.98669434],
       [-1258.98669434],
       [-1258.98669434],
       [-1258.98669434],
       [-1259.03540039],
       [-1258.93798828],
       [-1258.93798828],


In [65]:
X[0,-60:]

array([  2.67601257e-07,   3.71668413e-07,   2.08134311e-07,
         2.97334730e-07,   1.18933892e-07,   9.96071346e-07,
         1.93267575e-07,   6.09536197e-07,   1.48667365e-07,
         3.12201467e-07,   1.93267575e-07,   1.18933892e-07,
         2.23001048e-07,   2.08134311e-07,   1.78400838e-07,
         8.92004190e-08,   1.63534102e-07,   1.48667365e-07,
         6.24402933e-07,   3.12201467e-07,   3.56801676e-07,
         2.08134311e-07,   1.63534102e-07,   3.03281425e-06,
         2.08134311e-07,   6.69003143e-07,   4.90602305e-07,
         4.16268622e-07,   2.67601257e-07,   2.08134311e-07,
         6.98736616e-07,   2.23001048e-07,   2.08134311e-07,
         2.67601257e-07,   2.97334730e-07,   2.08134311e-07,
         2.37867784e-07,   2.08134311e-07,   2.97334730e-07,
         8.92004190e-07,   3.27068203e-07,   2.23001048e-07,
         2.52734521e-07,   2.82467994e-07,   2.67601257e-07,
         3.12201467e-07,   3.12201467e-07,   6.98736616e-07,
         9.66337873e-07,