In [0]:
import argparse
import yaml
import time
import datetime
import cv2
import numpy as np
import pandas as pd
import random

from skimage import io, transform
import matplotlib.pyplot as plt
from scipy.ndimage import zoom
from scipy import ndimage, misc

import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms, utils
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
import torch
from torch.autograd import Variable
import torch.optim as optim

from google.colab import drive
import os

import warnings
from collections import defaultdict

from scipy import stats

In [0]:
drive.mount('/content/drive', force_remount = True)
os.chdir('/content/drive/My Drive/Mosquito-Tec/DA-RNN')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Open Data

train_csv_file = '/content/drive/My Drive/Colab/mosquito/Final_Mosquito_train6_W.csv'
train_frame = pd.read_csv(train_csv_file)

train_frame["TRAPSET"] = pd.to_datetime(train_frame["TRAPSET"])
train_frame["TRAPCOLLECT"] = pd.to_datetime(train_frame["TRAPCOLLECT"])

train_frame["TRAPDAYS"] = (train_frame["TRAPCOLLECT"] - train_frame["TRAPSET"]).dt.days

train_frame["TRAPSET"] = train_frame["TRAPSET"].dt.week
train_frame["TRAPCOLLECT"] = train_frame["TRAPCOLLECT"].dt.week

# Test data

test_csv_file = '/content/drive/My Drive/Colab/mosquito/Final_Mosquito_test6_W.csv'
test_frame = pd.read_csv(test_csv_file)

test_frame["TRAPSET"] = pd.to_datetime(test_frame["TRAPSET"])
test_frame["TRAPCOLLECT"] = pd.to_datetime(test_frame["TRAPCOLLECT"])

test_frame["TRAPDAYS"] = (test_frame["TRAPCOLLECT"] - test_frame["TRAPSET"]).dt.days

test_frame["TRAPSET"] = test_frame["TRAPSET"].dt.week
test_frame["TRAPCOLLECT"] = test_frame["TRAPCOLLECT"].dt.week

In [0]:
non_temporal_columns = ["OBJECTID", "X", "Y", "TRAPTYPE", "ATTRACTANTUSED",
                        "TRAPID", "LATITUDE", "LONGITUDE", "ADDRESS", "TOWN",
                        "STATE", "COUNTY", "TRAPSITE", "TRAPSET", "SETTIMEOFDAY",
                        "YEAR", "TRAPCOLLECT", "COLLECTTIMEOFDAY", "GENUS",
                        "SPECIES", "LIFESTAGE", "EGGSCOLLECTED", "LARVAECOLLECTED",
                        "PUPAECOLLECTED", "REPORTDATE", "TRAPDAYS"]

non_num_columns = ["LATITUDE", "LONGITUDE", "TRAPDAYS"]

categorical_columnsT = ["TRAPTYPE", "ATTRACTANTSUSED", "TRAPID", "ADDRESS", "TOWN",
                        "STATE", "COUNTY", "TRAPSITE", "TRAPSET", "SETTIMEOFDAY",
                        "TRAPCOLLECT", "COLLECTTIMEOFDAY", "GENUS",
                        "SPECIES", "LIFESTAGE", "EGGSCOLLECTED", "LARVAECOLLECTED",
                        "PUPAECOLLECTED", "REPORTDATE"]

categorical_columns = ["TRAPTYPE", "ATTRACTANTSUSED", "SETTIMEOFDAY"]

temporal_columns = ["sunriseTime", "sunsetTime", "moonPhase", "precipIntensity",
                    "precipIntensityMax", "precipProbability", "temperatureHigh",
                    "temperatureHighTime", "temperatureLow", "temperatureLowTime",
                    "apparentTemperatureHigh", "apparentTemperatureHighTime",
                    "apparentTemperatureLow", "apparentTemperatureLowTime",
                    "dewPoint", "humidity", "pressure", "windSpeed", "windGust",
                    "windGustTime", "windBearing", "cloudCover", "uvIndex", 
                    "uvIndexTime", "visibility", "temperatureMin", "temperatureMinTime",
                    "temperatureMax", "temperatureMaxTime", "apparentTemperatureMin",
                    "apparentTemperatureMinTime", "apparentTemperatureMax", "apparentTemperatureMaxTime",
                    "icon", "time", "precipIntensityMaxTime", "precipType", "summary"] 

numerical_columnT = ["sunriseTime", "sunsetTime", "moonPhase", "precipIntensity",
                    "precipIntensityMax", "precipProbability", "temperatureHigh",
                    "temperatureHighTime", "temperatureLow", "temperatureLowTime",
                    "apparentTemperatureHigh", "apparentTemperatureHighTime",
                    "apparentTemperatureLow", "apparentTemperatureLowTime",
                    "dewPoint", "humidity", "pressure", "windSpeed", "windGust",
                    "windGustTime", "windBearing", "cloudCover", "uvIndex", 
                    "uvIndexTime", "visibility", "temperatureMin", "temperatureMinTime",
                    "temperatureMax", "temperatureMaxTime", "apparentTemperatureMin",
                    "apparentTemperatureMinTime", "apparentTemperatureMax", "apparentTemperatureMaxTime",
                    "time", "precipIntensityMaxTime"] 

numerical_columns = ["precipIntensity", "temperatureHigh", "temperatureLow",
                     "humidity", "uvIndex", "temperatureMax", "pressure"]

In [0]:
SEQ_LENGTH = 14
NUM_ENTRIES = train_frame.shape[0]

column_means = {}
column_maxs = {}
column_mins = {}

for col in numerical_columns:
    column_means[col] = 0
    column_maxs[col] = 0
    column_mins[col] = np.inf

    for i in range(1, SEQ_LENGTH + 1):
        train_frame[col + str(i)].replace(to_replace=r'No*', value=np.nan, regex=True, inplace=True)
        test_frame[col + str(i)].replace(to_replace=r'No*', value=np.nan, regex=True, inplace=True)

        if(train_frame[col + str(i)].dtype == np.dtype(object)):
            train_frame[col + str(i)] = train_frame[col + str(i)].astype("float64", copy="False")

        if(test_frame[col + str(i)].dtype == np.dtype(object)):
            test_frame[col + str(i)] = test_frame[col + str(i)].astype("float64", copy="False")

        cur_col = train_frame[col + str(i)]

        col_mean = cur_col.mean()

        train_frame[col + str(i)].replace(to_replace=np.nan, value=col_mean, inplace=True)
        test_frame[col + str(i)].replace(to_replace=np.nan, value=col_mean, inplace=True)

        column_means[col] += col_mean * NUM_ENTRIES
        if cur_col.max() > column_maxs[col]:
            column_maxs[col] = cur_col.max()

        if cur_col.min() < column_mins[col]:
            column_mins[col] = cur_col.min()

    column_means[col] /= SEQ_LENGTH * NUM_ENTRIES

    for i in range(1, SEQ_LENGTH + 1):
        test_frame[col + str(i)] = (test_frame[col + str(i)] - column_means[col]) / (column_maxs[col] - column_mins[col])
        train_frame[col + str(i)] = (train_frame[col + str(i)] - column_means[col]) / (column_maxs[col] - column_mins[col])


In [0]:
# Standardize Numerical Columns (not Temporal)

num_means = {}
num_maxs = {}
num_mins = {}

for col in non_num_columns:
    cur_col = train_frame[col]
    num_means[col] = cur_col.mean()
    num_maxs[col] = cur_col.max()
    num_mins[col] = cur_col.min()

    train_frame[col] = (cur_col - num_means[col]) / (num_maxs[col] - num_mins[col])
    test_frame[col] = (test_frame[col] - num_means[col]) / (num_maxs[col] - num_mins[col])

In [0]:
# Convert categories
ats_train = []
ats_test = []

for category in categorical_columns:
    train_frame[category] = train_frame[category].astype('category')
    test_frame[category] = test_frame[category].astype('category')
    ats_train.append(train_frame[category].cat.codes.values)
    ats_test.append(test_frame[category].cat.codes.values)

train_cat = np.stack(ats_train, 1)
test_cat = np.stack(ats_test, 1)

categorical_column_sizes = [len(train_frame[column].cat.categories) for column in categorical_columns]
embedding_sizes = [(col_size, min(50, (col_size + 1) // 2)) for col_size in categorical_column_sizes]

print(embedding_sizes)

[(3, 2), (4, 2), (2, 1)]


In [0]:
# Organize info by batches/sequences
# Shape [784, 14, 35] -> Train

#train_features = np.zeros(shape=(train_frame.shape[0], SEQ_LENGTH, len(numerical_columns)+len(non_num_columns)))
#test_features = np.zeros(shape=(test_frame.shape[0], SEQ_LENGTH, len(numerical_columns)+len(non_num_columns)))

train_features = np.zeros(shape=(train_frame.shape[0], SEQ_LENGTH, len(numerical_columns)))
test_features = np.zeros(shape=(test_frame.shape[0], SEQ_LENGTH, len(numerical_columns)))

for i in range(1, SEQ_LENGTH + 1):
    for row, col in enumerate(numerical_columns):
        train_features[:, i-1, row] = train_frame[col + str(i)]
        test_features[:, i-1, row] = test_frame[col + str(i)]

    '''    
    for row, col in enumerate(non_num_columns):
        train_features[:, i-1, row+len(numerical_columns)] = train_frame[col]
        test_features[:, i-1, row+len(numerical_columns)] = test_frame[col]
    '''

In [0]:
# Organize info that is not temporal
train_y = train_frame["TOTAL"].to_numpy()
test_y = test_frame["TOTAL"].to_numpy()

train_X = np.zeros(shape=(train_frame.shape[0], len(non_num_columns)))
test_X = np.zeros(shape=(test_frame.shape[0], len(non_num_columns)))

for row, col in enumerate(non_num_columns):
    train_X[:, row] = train_frame[col]
    test_X[:, row] = test_frame[col]

### Danger! Cuau Validation: DNR

In [0]:
# Create DataLoaders
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 16
# percentage of training set to use as validation
valid_size = 0.2


# obtain training indices that will be used for validation
num_train = len(train_X)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

print(len(train_idx), len(valid_idx))

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_data = TensorDataset(torch.from_numpy(train_features), 
                           torch.from_numpy(train_X), 
                           torch.from_numpy(train_cat),
                           torch.from_numpy(train_y))

test_data = TensorDataset(torch.from_numpy(test_features), 
                          torch.from_numpy(test_X), 
                          torch.from_numpy(test_cat),
                          torch.from_numpy(test_y))


# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size,
    sampler = train_sampler, num_workers = num_workers, drop_last=True)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, 
    sampler = valid_sampler, num_workers = num_workers, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, 
    num_workers = num_workers, shuffle = False, drop_last=True)

596 149


In [0]:
# Create DataLoaders
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 4
# percentage of training set to use as validation
valid_size = 0.15


# obtain training indices that will be used for validation
num_train = len(train_X)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

print(len(train_idx), len(valid_idx))

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_data = TensorDataset(torch.from_numpy(train_features[train_idx]), 
                           torch.from_numpy(train_X[train_idx]), 
                           torch.from_numpy(train_cat[train_idx]),
                           torch.from_numpy(train_y[train_idx]))

valid_data = TensorDataset(torch.from_numpy(train_features[valid_idx]), 
                           torch.from_numpy(train_X[valid_idx]), 
                           torch.from_numpy(train_cat[valid_idx]),
                           torch.from_numpy(train_y[valid_idx]))

test_data = TensorDataset(torch.from_numpy(test_features), 
                          torch.from_numpy(test_X), 
                          torch.from_numpy(test_cat),
                          torch.from_numpy(test_y))

print(len(train_data), len(valid_data))

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size,
    num_workers = num_workers, shuffle = True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size = batch_size, 
    num_workers = num_workers, shuffle = True, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, 
    num_workers = num_workers, shuffle = False, drop_last=True)

634 111
634 111


In [0]:
dataiter = iter(train_loader)
sample_features, sample_X, sample_cat, sample_y = dataiter.next()

In [0]:
print(sample_features.shape)
print(sample_X.shape)
print(sample_cat.shape)
print(sample_y.shape)

torch.Size([16, 14, 7])
torch.Size([16, 3])
torch.Size([16, 2])
torch.Size([16])


In [0]:
# First checking if GPU is available
train_on_gpu = torch.cuda.is_available()

if(train_on_gpu):
    print('Training on GPU.')
else:
    print('No GPU available, training on CPU.')

#train_on_gpu = False

Training on GPU.


In [0]:
class MosquitoLSTM(nn.Module):
    def __init__(self, num_numerical_columns, hidden_dim, n_layers):
        super(MosquitoLSTM, self).__init__()

        self.n_layers = n_layers
        self.hidden_dim = hidden_dim

        '''
        embed_size = 0
        for embed in embedding_sizes:
            embed_size += embed[1]

        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_sizes])
        '''

        # For LSTM
        self.lstm = nn.LSTM(num_numerical_columns, hidden_dim, n_layers,
                            dropout=0.3, batch_first=True)
        
        # dropout layer
        self.dropout = nn.Dropout(0.3)

        self.dense = nn.Sequential(
            
            nn.Linear(hidden_dim, 1)
        )
        self.ReLU = nn.ReLU()

    def forward(self, X_temp, hidden):
        lstm_out, hidden = self.lstm(X_temp, hidden)
    
        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
        print(lstm_out.shape)
        # dropout and fully-connected layer
        out = self.dense(lstm_out)
        # sigmoid function
        out = self.ReLU(out)
        
        print(out.shape)
        # reshape to be batch_size first
        out = out.view(batch_size, -1)
        print(out.shape)
        out = out[:, -1]
        print(out.shape)



        return out, hidden

    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden


In [0]:
class MosquitoRedux(nn.Module):
    def __init__(self, 
                 lstm_columns, lstm_hidden_dim, lstm_n_layers,
                 num_columns, num_hidden_dim,
                 embedding_sizes, cat_hidden_dim):
        super(MosquitoRedux, self).__init__()

        self.lstm_n_layers = lstm_n_layers
        self.lstm_hidden_dim = lstm_hidden_dim

        self.num_hidden_dim = num_hidden_dim
        self.cat_hidden_dim = cat_hidden_dim

        embed_size = 0
        for embed in embedding_sizes:
            embed_size += embed[1]

        self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_sizes])

        # For LSTM
        self.lstm = nn.LSTM(lstm_columns, lstm_hidden_dim, lstm_n_layers,
                            dropout=0.3, batch_first=True)
        
        # LSTM dense layer
        self.lstm_dense = nn.Sequential(
            nn.BatchNorm1d(lstm_hidden_dim),
            nn.Dropout(0.3),
            nn.Linear(lstm_hidden_dim, 1),
            nn.LeakyReLU()
        )
        
        self.num_dense = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(num_columns, num_hidden_dim),
            nn.LeakyReLU(),
            nn.BatchNorm1d(num_hidden_dim),
            nn.Linear(num_hidden_dim, 1),
            nn.LeakyReLU()
        )

        self.cat_dense = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(embed_size, cat_hidden_dim),
            nn.LeakyReLU(),
            nn.BatchNorm1d(cat_hidden_dim),
            nn.Linear(cat_hidden_dim, 1),
            nn.LeakyReLU()
        )

    def forward(self, X_temp, hidden, X_num, X_cat):
        # Forward lstm
        lstm_out, hidden = self.lstm(X_temp, hidden)
        # stack up lstm outputs
        lstm_out = lstm_out.contiguous().view(-1, self.lstm_hidden_dim)
        # dropout and fully-connected layer
        lstm_out = self.lstm_dense(lstm_out)
      
        # reshape to be batch_size first
        lstm_out = lstm_out.view(batch_size, -1)
        lstm_out = lstm_out[:, -1]
        lstm_out = lstm_out.view(batch_size, 1)

        # Forward num
        num_out = self.num_dense(X_num)

        # Forward cat
        embeddings = []
        for i, e in enumerate(self.all_embeddings):
            embed = e(X_cat[:, i])
            embeddings.append(embed)

        cat_out = torch.cat(embeddings, 1)
        cat_out = self.cat_dense(cat_out)

        return lstm_out, hidden, num_out, cat_out

    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.lstm_n_layers, batch_size, self.lstm_hidden_dim).zero_().cuda(),
                  weight.new(self.lstm_n_layers, batch_size, self.lstm_hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.lstm_n_layers, batch_size, self.lstm_hidden_dim).zero_(),
                      weight.new(self.lstm_n_layers, batch_size, self.lstm_hidden_dim).zero_())
        
        return hidden

In [0]:
# Instantiate the model w/ hyperparams
a_h = 5

lstm_hidden_dim = np.int(len(train_idx) / (a_h * len(numerical_columns) + 1))
print(lstm_hidden_dim)
lstm_n_layers = 2

num_hidden_dim = 8
cat_hidden_dim = 8

#net = MosquitoLSTM(len(numerical_columns), hidden_dim, n_layers).double()
net = MosquitoRedux(
    len(numerical_columns), lstm_hidden_dim, lstm_n_layers,
    len(non_num_columns), num_hidden_dim,
    embedding_sizes, cat_hidden_dim
).double()

weight_lstm = 2.0
weight_num = 3.0
weight_cat = 2.0

16


In [0]:
lr = 0.0001

criterion = nn.MSELoss()
#optimizer = torch.optim.Adam(net.parameters(), lr=lr)
optimizer = torch.optim.RMSprop(net.parameters(), lr=lr)

In [0]:
# training params

epochs = 200 # 3-4 is approx where I noticed the validation loss stop decreasing

counter = 0
print_every = 10
clip = 10 # gradient clipping

# move model to GPU, if available
if(train_on_gpu):
    net.cuda()

net.train()
# train for some number of epochs
valid_loss_min = np.Inf

for epoch in range(epochs):
    # initialize hidden state
    h = net.init_hidden(batch_size)

    train_loss = 0.0
    valid_loss = 0.0

    net.train()

    # batch loop
    for X_temp, X_num, X_cat, labels in train_loader:
        counter += 1

        if(train_on_gpu):
            X_temp, X_num, X_cat, labels = X_temp.cuda(), X_num.cuda(), X_cat.type(torch.LongTensor).cuda(), labels.cuda()

        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])
        # zero accumulated gradients
        net.zero_grad()

        # get the output from the model
        lstm_output, h, num_output, cat_output = net(X_temp, h, X_num, X_cat)

        output = (weight_lstm*lstm_output + weight_num*num_output + weight_cat*cat_output) 

        # calculate the loss and perform backprop
        loss = criterion(output.squeeze(), labels.double())
        loss.backward()
        train_loss += loss.item()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()

    net.eval()

    val_h = net.init_hidden(batch_size)

    for X_temp, X_num, X_cat, labels in valid_loader:
        
        
        if(train_on_gpu):
            X_temp, X_num, X_cat, labels = X_temp.cuda(), X_num.cuda(), X_cat.type(torch.LongTensor).cuda(), labels.cuda()

        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        val_h = tuple([each.data for each in val_h])

        # get the output from the model
        lstm_output, h, num_output, cat_output = net(X_temp, h, X_num, X_cat)

        # calculate the loss and perform backprop
        output = (weight_lstm*lstm_output + weight_num*num_output + weight_cat*cat_output) 
        loss = criterion(output.squeeze(), labels.double())
        valid_loss += loss.item()

    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(net.state_dict(), 'lstm_test.pt')
        valid_loss_min = valid_loss
        
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))

Validation loss decreased (inf --> 2467.577166).  Saving model ...
Epoch: 0 	Training Loss: 4155.658451 	Validation Loss: 2467.577166
Validation loss decreased (2467.577166 --> 2360.766804).  Saving model ...
Epoch: 1 	Training Loss: 3956.668756 	Validation Loss: 2360.766804
Validation loss decreased (2360.766804 --> 1940.561754).  Saving model ...
Epoch: 2 	Training Loss: 4033.685055 	Validation Loss: 1940.561754
Epoch: 3 	Training Loss: 3949.039796 	Validation Loss: 2305.362908
Epoch: 4 	Training Loss: 4005.923488 	Validation Loss: 2237.808165
Epoch: 5 	Training Loss: 4078.748406 	Validation Loss: 2214.898759
Epoch: 6 	Training Loss: 3885.448730 	Validation Loss: 2200.243593
Epoch: 7 	Training Loss: 3960.944540 	Validation Loss: 2193.760746
Epoch: 8 	Training Loss: 3900.574044 	Validation Loss: 2220.251198
Epoch: 9 	Training Loss: 3767.570995 	Validation Loss: 2201.358506
Epoch: 10 	Training Loss: 3828.652089 	Validation Loss: 2209.642114
Epoch: 11 	Training Loss: 3826.675027 	Valida

In [0]:
net.load_state_dict(torch.load('lstm_test.pt'))

<All keys matched successfully>

In [0]:
from sklearn.metrics import mean_absolute_error

# track test loss
test_loss = 0.0
mean_abs = 0.0

net.eval()
# iterate over test data
#bin_op.binarization()
test_h = net.init_hidden(batch_size)

for batch_idx, (X_temp, X_num, X_cat, labels) in enumerate(test_loader):
    if(train_on_gpu):
        X_temp, X_num, X_cat, labels = X_temp.cuda(), X_num.cuda(), X_cat.type(torch.LongTensor).cuda(), labels.cuda()
    
    test_h = tuple([each.data for each in test_h])

    # get the output from the model
    lstm_output, h, num_output, cat_output = net(X_temp, h, X_num, X_cat)

    output = (weight_lstm*lstm_output + weight_num*num_output + weight_cat*cat_output) 
    # / (weight_lstm + weight_num + weight_cat)

    # calculate the loss and perform backprop
    loss = criterion(output.squeeze(), labels.double())

    out_np = output.detach().squeeze().cpu().numpy().astype(int).T
    target_np = labels.detach().cpu().numpy().astype(int).T

    print([(out, tar) for out, tar in zip(out_np, target_np)])

    mean_abs += mean_absolute_error(out_np, target_np) * batch_size
        
    test_loss += loss.item() * batch_size

# calculate average losses
test_loss = np.sqrt(test_loss / len(test_loader.dataset))
mean_abs /= len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
print('Mean Abs Loss: {:.6f}\n'.format(mean_abs))

[(4, 3), (4, 3), (4, 2), (4, 2), (4, 2), (4, 3), (4, 3), (4, 3), (4, 2), (4, 6), (4, 4), (4, 4), (4, 5), (4, 3), (4, 2), (4, 16)]
[(4, 4), (4, 10), (4, 5), (4, 3), (4, 8), (5, 2), (4, 9), (4, 66), (5, 3), (4, 9), (4, 11), (4, 8), (4, 25), (4, 4), (5, 3), (5, 2)]
[(4, 3), (9, 3), (9, 6), (9, 3), (9, 8), (9, 8), (9, 6), (9, 9), (9, 12), (9, 18), (9, 24), (9, 19), (7, 3), (7, 15), (9, 12), (7, 12)]
[(9, 17), (9, 9), (7, 3), (7, 12), (9, 33), (9, 11), (7, 6), (7, 2), (9, 7), (9, 5), (7, 2), (9, 11), (9, 23), (9, 4), (9, 7), (7, 5)]
[(7, 6), (7, 10), (7, 12), (7, 5), (7, 10), (8, 2), (6, 3), (7, 32), (7, 13), (8, 27), (7, 9), (7, 12), (8, 16), (7, 6), (8, 3), (7, 7)]
[(8, 2), (8, 6), (6, 10), (7, 6), (7, 2), (8, 3), (8, 5), (7, 2), (7, 5), (8, 17), (7, 18), (7, 3), (8, 8), (6, 2), (7, 15), (8, 5)]
Test Loss: 8.595820

Mean Abs Loss: 4.628571



## DA-RNN

In [0]:
nhidden_encoder = 128
nhidden_decoder = 128

In [0]:
def init_hidden(x, hidden_size: int):
    if train_on_gpu:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_()).cuda()
        else:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_())

class Encoder(nn.Module):
    def __init__(self, T,
                 input_size,
                 encoder_num_hidden,
                 parallel=False):
        
        super(Encoder, self).__init__()
        self.encoder_num_hidden = encoder_num_hidden
        self.input_size = input_size
        self.parallel = parallel
        self.T = T

        # Fig 1. Temporal Attention Mechanism: Encoder is LSTM
        self.encoder_lstm = nn.LSTM(
            input_size=self.input_size,
            hidden_size=self.encoder_num_hidden,
            num_layers = 1,
            batch_first = True
        )

        # Construct Input Attention Mechanism via deterministic attention model
        # Eq. 8: W_e[h_{t-1}; s_{t-1}] + U_e * x^k
        self.encoder_attn = nn.Linear(
            in_features=2 * self.encoder_num_hidden + self.T - 1,
            out_features=1
        )

    def forward(self, X):
        X_tilde = Variable(X.data.new(
            X.size(0), self.T - 1, self.input_size).zero_())
        X_encoded = Variable(X.data.new(
            X.size(0), self.T - 1, self.encoder_num_hidden).zero_())

        # h_n, s_n: initial states with dimention hidden_size
        h_n = self._init_states(X)
        s_n = self._init_states(X)



        for t in range(self.T - 1):
            # batch_size * input_size * (2 * hidden_size + T - 1)
            x = torch.cat((h_n.repeat(self.input_size, 1, 1).permute(1, 0, 2),
                           s_n.repeat(self.input_size, 1, 1).permute(1, 0, 2),
                           X.permute(0, 2, 1)), dim=2)

            x = self.encoder_attn(
                x.view(-1, self.encoder_num_hidden * 2 + self.T - 1))

            # get weights by softmax
            alpha = F.softmax(x.view(-1, self.input_size))
            x_tilde = torch.mul(alpha, X[:, t, :])

            self.encoder_lstm.flatten_parameters()

            # encoder LSTM
            _, final_state = self.encoder_lstm(x_tilde.unsqueeze(0), (h_n, s_n))
            h_n = final_state[0]
            s_n = final_state[1]

            X_tilde[:, t, :] = x_tilde
            X_encoded[:, t, :] = h_n

        return X_tilde, X_encoded

    def _init_states(self, X):
        if train_on_gpu:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_()).cuda()
        else:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_())

In [0]:
class Decoder(nn.Module):
    """decoder in DA_RNN."""

    def __init__(self, T, decoder_num_hidden, encoder_num_hidden):
        """Initialize a decoder in DA_RNN."""
        super(Decoder, self).__init__()
        self.decoder_num_hidden = decoder_num_hidden
        self.encoder_num_hidden = encoder_num_hidden
        self.T = T

        self.attn_layer = nn.Sequential(
            nn.Linear(2 * decoder_num_hidden + encoder_num_hidden, encoder_num_hidden),
            nn.Tanh(),
            nn.Linear(encoder_num_hidden, 1)
        )
        self.lstm_layer = nn.LSTM(
            input_size=1,
            hidden_size=decoder_num_hidden
        )
        self.fc = nn.Linear(encoder_num_hidden + 1, 1)
        self.fc_final = nn.Linear(decoder_num_hidden + encoder_num_hidden, 1)

        self.fc.weight.data.normal_()

    def forward(self, X_encoded, y_prev):
        """forward."""
        d_n = self._init_states(X_encoded)
        c_n = self._init_states(X_encoded)

        for t in range(self.T - 1):

            x = torch.cat((d_n.repeat(self.T - 1, 1, 1).permute(1, 0, 2),
                           c_n.repeat(self.T - 1, 1, 1).permute(1, 0, 2),
                           X_encoded), dim=2)

            beta = F.softmax(self.attn_layer(
                x.view(-1, 2 * self.decoder_num_hidden + self.encoder_num_hidden)).view(-1, self.T - 1))

            # Eqn. 14: compute context vector
            # batch_size * encoder_hidden_size
            context = torch.bmm(beta.unsqueeze(1), X_encoded)[:, 0, :]
            if t < self.T - 1:
                # Eqn. 15
                # batch_size * 1
                y_tilde = self.fc(
                    torch.cat((context, y_prev[:, t].unsqueeze(1)), dim=1))

                # Eqn. 16: LSTM
                self.lstm_layer.flatten_parameters()
                _, final_states = self.lstm_layer(
                    y_tilde.unsqueeze(0), (d_n, c_n))

                d_n = final_states[0]  # 1 * batch_size * decoder_num_hidden
                c_n = final_states[1]  # 1 * batch_size * decoder_num_hidden

        # Eqn. 22: final output
        y_pred = self.fc_final(torch.cat((d_n[0], context), dim=1))

        return y_pred

    def _init_states(self, X):
        if train_on_gpu:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_()).cuda()
        else:
            return Variable(X.data.new(1, X.size(0), self.encoder_num_hidden).zero_())

In [0]:
class DA_rnn(nn.Module):
    """da_rnn."""

    def __init__(self, temp_columns, T,
                 encoder_num_hidden,
                 decoder_num_hidden,
                 batch_size,
                 learning_rate,
                 epochs,
                 parallel=False):
        """da_rnn initialization."""
        super(DA_rnn, self).__init__()
        self.encoder_num_hidden = encoder_num_hidden
        self.decoder_num_hidden = decoder_num_hidden
        self.parallel = parallel
        self.T = T

        self.Encoder = Encoder(input_size=temp_columns,
                               encoder_num_hidden=encoder_num_hidden,
                               T=T)
        self.Decoder = Decoder(encoder_num_hidden=encoder_num_hidden,
                               decoder_num_hidden=decoder_num_hidden,
                               T=T)


        if self.parallel:
            self.encoder = nn.DataParallel(self.encoder)
            self.decoder = nn.DataParallel(self.decoder)

    def forward(self, X, y_prev, y_gt):
        input_weighted, input_encoded = self.Encoder(
            Variable(torch.from_numpy(X).type(torch.FloatTensor).to(self.device)))
        y_pred = self.Decoder(input_encoded, Variable(
            torch.from_numpy(y_prev).type(torch.FloatTensor).to(self.device)))


    def train_forward(self, X, y_prev):
        input_weighted, input_encoded = self.Encoder(X)
        y_pred = self.Decoder(input_encoded, y_prev)

        return input_weighted, input_encoded, y_pred