In [1]:
%load_ext autoreload
%autoreload 2
import sys
import gc

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as data_utils
from torch.autograd import Variable

from tqdm import tqdm

sys.path.append('../')
from wiki.utils import clock
from wiki import rnn, rnn_predict, newphet, val, submissions, rnn_meta as rnn_meta
%matplotlib inline

In [2]:
base_dir = '../data/'
pred_len = 62
batch_size = 1024

In [3]:
train_df = pd.read_csv(base_dir+'train_2.csv').fillna(0)

In [4]:
values = train_df.drop('Page', axis=1).values ; values.shape

(145063, 550)

In [5]:
dates = train_df.columns[1:].values
s_date = dates[0]
e_date = dates[-1]

In [6]:
dates = pd.date_range(s_date, e_date)

In [7]:
ages = np.arange(len(dates))
dows = dates.dayofweek.values
woys = dates.weekofyear.values

In [8]:
#Expand the dims to make broadcasting work - since numpy
#refuses to add dimensions to the right when broadcasting
series_idxs = np.expand_dims(np.arange(values.shape[0]), axis=-1)

In [9]:
values, scaler = rnn.scale_values(values)

In [10]:
values = values.squeeze()

In [11]:
br = lambda x: np.broadcast_to(x, values.shape)

In [12]:
features = np.stack([values, br(ages), br(dows), br(woys), br(series_idxs)], axis=-1)

In [13]:
features.shape

(145063, 550, 5)

Ok, the `DataLoaders` aren't gonna work anymore since it makes everything inside a Variable which require gradients. Our embedding indices compute gradient wrt to the embeddings, not the indices, so it breaks. That means we've gotta split it up - so we might as well just do it ourselves.

In [14]:
trainloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(features[:,:-2*pred_len,:]).float(),
        torch.from_numpy(features[:,-2*pred_len:-pred_len,:]).float()
    ),
    batch_size=batch_size, shuffle=False
)
valloader = data_utils.DataLoader(
    data_utils.TensorDataset(
        torch.from_numpy(features[:,:-pred_len,:]).float(),
        torch.from_numpy(features[:,-pred_len:,:]).float()
    ),
    batch_size=batch_size, shuffle=False
)

In [None]:
model = rnn_meta.RNN().cuda()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
save_best_path = base_dir+'rnn_stage2_v3_lr1_weights.mdl'
with clock():
    model.fit(trainloader, valloader, optimizer=optimizer, num_epochs=25, save_best_path=save_best_path)


EPOCH 1
Running average loss: 0.451747
VALIDATION LOSS: 0.506225
Elapsed time 128.3827178478241 seconds

EPOCH 2
Running average loss: 0.442855
VALIDATION LOSS: 0.504274
Elapsed time 128.8742551803589 seconds

EPOCH 3
Running average loss: 0.439102
VALIDATION LOSS: 0.495854
Elapsed time 128.8277611732483 seconds

EPOCH 4
Running average loss: 0.429084
VALIDATION LOSS: 0.482337
Elapsed time 128.86404418945312 seconds

EPOCH 5
Running average loss: 0.387165
VALIDATION LOSS: 0.467202
Elapsed time 128.9026734828949 seconds

EPOCH 8
Running average loss: 0.381860
VALIDATION LOSS: 0.467539
Elapsed time 128.84917330741882 seconds

EPOCH 9
Running average loss: 0.377577
VALIDATION LOSS: 0.442615
Elapsed time 128.80337691307068 seconds

EPOCH 10
Running average loss: 0.376156
VALIDATION LOSS: 0.457810
Elapsed time 128.85196375846863 seconds

EPOCH 11
Running average loss: 0.372141
VALIDATION LOSS: 0.448912
Elapsed time 128.75375318527222 seconds

EPOCH 12
Running average loss: 0.369520
VALIDAT

In [28]:
model

RNN (
  (embedding): Embedding(145063, 20)
  (rnn): GRU(24, 128, num_layers=2, batch_first=True, dropout=0.2)
  (out): Linear (128 -> 1)
  (loss_func): L1Loss (
  )
)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
save_best_path = base_dir+'rnn_stage2_v3_lr2_weights.mdl'
with clock():
    model.fit(trainloader, valloader, optimizer=optimizer, num_epochs=20, save_best_path=save_best_path)


EPOCH 1
Running average loss: 0.354721
VALIDATION LOSS: 0.429406
Elapsed time 128.99523210525513 seconds

EPOCH 2
Running average loss: 0.351667
VALIDATION LOSS: 0.428612
Elapsed time 128.9609522819519 seconds

EPOCH 3
Running average loss: 0.350720
VALIDATION LOSS: 0.428199
Elapsed time 128.92836594581604 seconds

EPOCH 4
Running average loss: 0.350254
VALIDATION LOSS: 0.428064
Elapsed time 129.02649664878845 seconds

EPOCH 5
Running average loss: 0.349885
VALIDATION LOSS: 0.427551
Elapsed time 128.936537027359 seconds

EPOCH 6
Running average loss: 0.349603
VALIDATION LOSS: 0.427624
Elapsed time 128.90549182891846 seconds

EPOCH 7
Running average loss: 0.349295
VALIDATION LOSS: 0.427687
Elapsed time 129.03417921066284 seconds

EPOCH 8
Running average loss: 0.349090
VALIDATION LOSS: 0.427459
Elapsed time 128.83287477493286 seconds

EPOCH 9
Running average loss: 0.348818
VALIDATION LOSS: 0.427029
Elapsed time 128.97450137138367 seconds

EPOCH 10
Running average loss: 0.348550
VALIDATI

In [None]:
save_best_path = base_dir+'rnn_stage2_v3_lr2_weights.mdl'
model = rnn_meta.RNN().cuda()
model.load_state_dict(torch.load(save_best_path))

In [None]:
model.validate(valloader)

In [None]:
outputs, targets, sequences = model.predict(valloader)

In [None]:
_, predictions = rnn_predict.combine_prediction_data(outputs, targets, sequences)

In [None]:
base_dir = '../data/'
train_df = pd.read_csv(base_dir+'train_2.csv')
X = train_df.drop('Page', axis=1).values

In [None]:
predictions = scaler.inverse_transform(predictions.T).T
true = X

In [None]:
smapes = val.smape(true[:,-60:], predictions[:,-60:], axis=1)
smapes_clipped = val.smape(true[:,-60:], predictions[:,-60:].round().clip(0), axis=1)

In [None]:
np.nanmean(smapes), np.nanmean(smapes_clipped)

In [None]:
np.save(base_dir+'rnn_v3_predictions.npy', outputs)