# Full-connected network

In [7]:
%matplotlib inline
import lab.setup
import functools
import pandas as pd
import numpy as np
import numba

from pandas.tseries.offsets import *

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

g_region_temporal = 5
g_region_spatial  = 1
g_start_date = '2016-03-03'
g_end_date   = '2016-03-03'
g_start_time = '{} 00:00:00'.format(g_start_date)
g_end_time   = '{} 23:59:59'.format(g_end_date)

DATA_PATH = 'dataset'

## modeling

In [8]:
# load from prepared dataset generated by linear.ipynb

In [9]:
ds_train_full = pd.read_csv('dataset/ds_filled_s1.csv', dtype={'link_ID':'uint64'}, low_memory=False)
ds_train_full.head(1)

class PandasDataset(data.Dataset):
    def __init__(self, df, feature_columns):
        self.df = df[feature_columns].astype('float32')
        self.dataset = self.df.values
        self.temporal_order = g_region_temporal
        self.feature_size = self.temporal_order * len(feature_columns)
        self.len = self.df.shape[0] - (self.temporal_order - 1) - 1
        self.label_index = self.df.columns.tolist().index('travel_time')
        
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        feature_vec = self.dataset[idx:idx+self.temporal_order].reshape(self.feature_size)
        label_vec   = self.dataset[idx+self.temporal_order][self.label_index:self.label_index+1]
        
        return {'feature': feature_vec, 'label': label_vec}

def collate(batch):
    "Puts each data field into a tensor with outer dimension batch size"
    feature_batch = torch.stack([torch.from_numpy(f['feature']) for f in batch], 0)
    label_batch = torch.stack([torch.from_numpy(f['label']) for f in batch], 0)
    return {
        'feature': feature_batch, 
        'label': label_batch
    }

In [10]:
# Build linear model
feature_columns = ['travel_time', 'uplink_mean_tt', 'downlink_mean_tt']

B     = 256
D_in  = g_region_temporal * len(feature_columns)
D_hidden = D_in
D_out = 1
TRAIN_SET_RATIO = 0.80

class FCNN(nn.Module):
    def __init__(self):
        super(FCNN, self).__init__()
        self.fc1 = nn.Linear(D_in, D_hidden)
        self.fc2 = nn.Linear(D_hidden, D_hidden)
        self.fc3 = nn.Linear(D_hidden, D_out)

    def forward(self, x):
        x = F.relu(F.dropout(self.fc1(x), 0.0))
        x = F.relu(F.dropout(self.fc2(x), 0.2))
        x = self.fc3(x)
        return x

model = FCNN()
model.cuda()

def rmse(y_hat, y):
    """Compute root mean squared error"""
    return torch.sqrt(torch.mean((y - y_hat).pow(2)))

def mape(y_hat, y):
    """Compute root mean squared error"""
    return torch.mean(((y - y_hat) / y).abs())

loss_fn = torch.nn.L1Loss()
loss_fn = mape

learning_rate = 1e-2
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 我们需要按link_ID切分dataset，因为不同link的数据不能看作一个时间序列
datasets_train = []
datasets_valid = []
link_no = ds_train_full.link_ID.unique().shape[0]
counter = 0
for link_ID, link_ds in ds_train_full.groupby('link_ID'):
    counter += 1
    if counter < link_no * TRAIN_SET_RATIO:
        datasets_train.append(PandasDataset(link_ds, feature_columns))
    else:
        datasets_valid.append(PandasDataset(link_ds, feature_columns))

print('train set size:', len(datasets_train) * len(datasets_train[0]))
print('valid set size:', len(datasets_valid) * len(datasets_valid[0]))
dataset_train = data.ConcatDataset(datasets_train)
dataset_valid = data.ConcatDataset(datasets_valid)
loader_train = data.DataLoader(dataset_train, batch_size=B, shuffle=False, num_workers=4, collate_fn=collate)
loader_valid = data.DataLoader(dataset_valid, batch_size=B, shuffle=False, num_workers=4, collate_fn=collate)

def validate():
    eval_running_loss = 0.0
    counter = 0
    for i_batch, sample_batch in enumerate(loader_valid):
        x_batch = Variable(sample_batch['feature']).cuda()
        y_batch = Variable(sample_batch['label']).cuda()
        y_batch_pred = model(x_batch)
        loss = loss_fn(y_batch_pred, y_batch)

        eval_running_loss += loss.data[0]
        counter += 1
        
    return eval_running_loss / counter

num_epochs = 100
epoch_loss_records = []
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)
for epoch in range(num_epochs):
    lr_scheduler.step()
    running_loss = 0.0
    counter = 0
    for i_batch, sample_batch in enumerate(loader_train):
        x_batch = Variable(sample_batch['feature']).cuda()
        y_batch = Variable(sample_batch['label']).cuda()
                
        # forward
        y_batch_pred = model(x_batch)
        loss = loss_fn(y_batch_pred, y_batch)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.data[0]
        counter += 1

    epoch_mean_loss = running_loss / counter
    eval_mean_loss = validate()
    print('=== epoch[{}/{}], loss: {:.6f}, valid_loss: {:.6f} ==='
                  .format(epoch + 1, num_epochs, epoch_mean_loss, eval_mean_loss))
    epoch_loss_records.append(epoch_mean_loss)

train set size: 75075
valid set size: 19305
=== epoch[1/100], loss: 0.385971, valid_loss: 0.330751 ===
=== epoch[2/100], loss: 0.294439, valid_loss: 0.250065 ===
=== epoch[3/100], loss: 0.302112, valid_loss: 0.234529 ===
=== epoch[4/100], loss: 0.343960, valid_loss: 0.254090 ===
=== epoch[5/100], loss: 0.292957, valid_loss: 0.238681 ===
=== epoch[6/100], loss: 0.255936, valid_loss: 0.235917 ===
=== epoch[7/100], loss: 0.228289, valid_loss: 0.309622 ===
=== epoch[8/100], loss: 0.231741, valid_loss: 0.251485 ===
=== epoch[9/100], loss: 0.242353, valid_loss: 0.202187 ===
=== epoch[10/100], loss: 0.240744, valid_loss: 0.277689 ===
=== epoch[11/100], loss: 0.227105, valid_loss: 0.198969 ===
=== epoch[12/100], loss: 0.200147, valid_loss: 0.190875 ===
=== epoch[13/100], loss: 0.196064, valid_loss: 0.187158 ===
=== epoch[14/100], loss: 0.195344, valid_loss: 0.183831 ===
=== epoch[15/100], loss: 0.196942, valid_loss: 0.187719 ===
=== epoch[16/100], loss: 0.203650, valid_loss: 0.193430 ===
=== e