In [1]:
%load_ext autoreload
%autoreload 2
import sys
import gc

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as data_utils
from torch.autograd import Variable

from tqdm import tqdm

sys.path.append('../')
from wiki.utils import clock
from wiki import rnn, rnn_predict, newphet, val, submissions, rnn_meta_cudaless as rnn_meta
%matplotlib inline

In [2]:
base_dir = '../data/'
pred_len = 60
batch_size = 16

In [3]:
train_df = pd.read_csv(base_dir+'train_1.csv', nrows=1000).fillna(0)

In [4]:
values = train_df.drop('Page', axis=1).values ; values.shape

(1000, 550)

Features to add:
1. AGE (number in sequence)
2. DOW
3. WEEK OF YEAR
4. EMBEDDING 
  - with dimensions roughly taken from the amazon paper<sup>[1](https://arxiv.org/pdf/1704.04110.pd)</sup>
    - 20 output dimensions, since they did a grid search and found over a large range of input sizes that 20 was best
    - all series as input dimensions. In the paper they picked only 5 but I they don't seem to say what these were. 
  - This would increase the number of parameters by a factor of 20, so may well be computationally intractable. To lower:
    - Could lower input dimensions by grouping pages together (lowering number of parameters by 3\*)
    - Could lower output dimensions
    - All else fails, could use language for the input dimensions

In [5]:
dates = train_df.columns[1:].values
s_date = dates[0]
e_date = dates[-1]

In [6]:
dates = pd.date_range(s_date, e_date)

In [7]:
ages = np.arange(len(dates))
dows = dates.dayofweek.values
woys = dates.weekofyear.values

In [8]:
#Expand the dims to make broadcasting work - since numpy
#refuses to add dimensions to the right when broadcasting
series_idxs = np.expand_dims(np.arange(values.shape[0]), axis=-1)

In [9]:
values, scaler = rnn.scale_values(values)

In [10]:
values = values.squeeze()

In [11]:
br = lambda x: np.broadcast_to(x, values.shape)

In [12]:
features = np.stack([values, br(ages), br(dows), br(woys), br(series_idxs)], axis=-1)

In [13]:
features.shape

(1000, 550, 5)

Ok, the `DataLoaders` aren't gonna work anymore since it makes everything inside a Variable which require gradients. Our embedding indices compute gradient wrt to the embeddings, not the indices, so it breaks. That means we've gotta split it up - so we might as well just do it ourselves.

In [14]:
trainloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(features[:,:-2*pred_len,:]).float(),
        torch.from_numpy(features[:,-2*pred_len:-pred_len,:]).float()
    ),
    batch_size=batch_size, shuffle=False
)
valloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(features[:,:-pred_len,:]).float(),
        torch.from_numpy(features[:,-pred_len:,:]).float()
    ),
    batch_size=batch_size, shuffle=False
)

In [26]:
model = rnn_meta.RNN()

24


In [20]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
with clock():
    model.fit(trainloader, valloader, optimizer=optimizer, num_epochs=1)


EPOCH 1
Running average loss: 0.495220