In [1]:
%matplotlib inline
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def prepData(filepath):
    data = np.load(filepath)
    xs = data['x'][:,:,:,0]
    ys = data['y'][:,:,:,0]
    xExamples = xs.reshape((data['x'].shape[0], data['x'].shape[1], -1))
    yExamples = ys.reshape((data['y'].shape[0], data['y'].shape[1], -1))
    return xExamples, yExamples

In [3]:
trafficX, trafficY = prepData("../DCRNN/data/train.npz")

In [4]:
trafficValX, trafficValY = prepData("../DCRNN/data/val.npz")

In [5]:
MAX_LENGTH = trafficX.shape[1]

In [6]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru=nn.GRU(input_size, hidden_size, num_layers=num_layers)

    def forward(self, input, hidden):
        #print("encoder input ", input)
        input = input.view(1,1,-1).float()
        output, hidden = self.gru(input, hidden)
        #print("encoder output ", output)
        return output, hidden
    
    def initHidden(self):
        return torch.zeros(self.num_layers,1,self.hidden_size, device=device)


In [7]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers=1):
        input_size = output_size
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.num_layers = num_layers
    def forward(self, input, hidden):
        #print("decoder input: ", input)
        input = input.view(1,1,-1).float()
        output, hidden = self.gru(input, hidden)
        #print("decoder outFC input", output)
        outRes = self.out(output)
        #print("decoder outFC output", outRes)
        #output = self.softmax(outRes)
        #print("decoder softmax output: ", output)
        #return output, hidden
        
        return outRes, hidden
    
    def initHidden(self):
        return torch.zeros(1,1,self.hidden_size, device=device)

In [8]:
teacher_forcing_ratio = 1
NULLCHAR = 0
def train(input_tensor, target_tensor, encoder, decoder,
          encoder_optimizer, decoder_optimizer, criterion,
          max_length = MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0) # should be 12
    target_length = target_tensor.size(0) # should be 12
    
    encoder_outputs = torch.zeros(max_length, 
                                  encoder.hidden_size, device=device)
    loss = 0
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]
    
    #first input to decoder is a null character (0)
    #decoder_input = torch.tensor([[target_tensor[0]]], device=device)
    
    #initialize decoder hidden state to last encoder hidden state
    decoder_hidden = encoder_hidden
    decoder_input = torch.tensor([np.zeros(target_tensor.size()[-1])], device=device)
    
    use_teacher_forcing = True if random.random() <teacher_forcing_ratio else False
    if use_teacher_forcing:
        #feed the target as the next input
        for di in range(target_length):
            #decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            #print(decoder_output, target_tensor[di])
            
            if criterion[1] == "MSE":
                loss += torch.sqrt(criterion[0](decoder_output.squeeze(), target_tensor[di].squeeze().float()))
            elif criterion[1] == "Mean Absolute Error":
                loss += criterion[0](decoder_output.squeeze(), target_tensor[di].squeeze().float())
            else:
                assert 0, "Cannot match loss"
            decoder_input = target_tensor[di]  # Teacher forcing
    
    else:
        #use own prediction as the next input
        for di in range(target_length):
            #decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            #topv, topi = decoder_output.topk(1)
            
            # detach from history as input
            #decoder_input = topi.squeeze().detach()
            decoder_input = decoder_output.squeeze().detach()
            if criterion[1] == "MSE":
                loss += torch.sqrt(criterion(decoder_output.squeeze(), target_tensor[di].squeeze().float()))
            elif criterion[1] == "Mean Absolute Error":
                loss += criterion(decoder_output.squeeze(), target_tensor[di].squeeze().float())
            else:
                assert 0, "Cannot match loss"
            if decoder_input.item() == NULLCHAR:
                break
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [9]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [10]:
def trainIters(encoder, decoder, xMatrix, yMatrix, n_iters, model_description, xVal, yVal, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    validationLosses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    #criterion = (nn.MSELoss(),"MSE")
    criterion = (nn.L1Loss(size_average=True), "Mean Absolute Error")
    #valLoss = (nn.MSELoss(), "MSE")
    valLoss = (nn.L1Loss(size_average=True), "Mean Absolute Error")
    for iter in range(1, n_iters+1):
        choice = np.random.randint(0, xMatrix.shape[0])
        input_tensor = torch.FloatTensor(xMatrix[choice], device=device)
        target_tensor = torch.FloatTensor(yMatrix[choice], device=device)
        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            # Get validation loss
            validationLosses.append(validate(encoder, decoder, xVal, yVal, valLoss[0]))
    showPlot(validationLosses, plot_every, model_description, criterion[1], training=False)
    showPlot(plot_losses, plot_every, model_description, valLoss[1], training=True)

In [11]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import os

def showPlot(points, plot_every, model_description, lossDescription, training=True):
    plt.rcParams.update({'font.size': 8})
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    #loc = ticker.MultipleLocator(base=0.2)
    #ax.yaxis.set_major_locator(loc)
    
    plt.xlabel("iteration")
    plt.ylabel(lossDescription)
    plt.plot(np.arange(len(points))*plot_every,points)
    plt.grid()
    plt.title("{} {} - {}".format("training" if training else "Validation", lossDescription, model_description))
    filestring = "./figs/{}_loss_plot_0.png".format("training" if training else "Validation")
    while(os.path.isfile(filestring)):
        filestring = filestring[:-5] + str(int(filestring[-5]) + 1) + ".png"
    plt.savefig(filestring)

In [12]:
def evaluate(encoder, decoder, inputSequence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = inputSequence
        input_length = input_tensor.size()[0] # should be 12 for traffic data 1 hr.
        encoder_hidden = encoder.initHidden()
        
        encoder_outputs = torch.zeros(
            max_length,encoder.hidden_size, device=device)
        
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]
        
        decoder_input = torch.tensor([np.zeros(input_tensor.size()[-1])], device=device)
        
        decoder_hidden = encoder_hidden
        
        nextSequence = []
        #decoder_attention = torch.zeros(max_length, max_length)
        
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            
            #decoder_attentions[di] = decoder_attention.data
            #topv, topi = decoder_output.data.topk(1)
#             if topi.item() == NULLCHAR:
#                 nextSequence.append('XX')
#                 break
#             else:
#                 nextSequence.append(topi.item())
            nextSequence.append(decoder_output.data.squeeze().detach())
            
            decoder_input = decoder_output.data.squeeze().detach()
        return nextSequence


In [13]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(tensorPairs)
        print('>', pair[0])
        print('=', pair[1])
        nextSequence = evaluate(encoder, decoder, pair[0])
        outputSequence = ' '.join(nextSequence)
        print('<', outputSequence)
        print('')

In [14]:
def validate(encoder, decoder, xMatrix, yMatrix, lossFn, n_vals=100):
    loss = 0.0
    for i in range(n_vals):
        choice = np.random.randint(0, xMatrix.shape[0])
        input_tensor = torch.FloatTensor(xMatrix[choice], device=device)
        target_tensor = torch.FloatTensor(yMatrix[choice], device=device)
        nextSequence = evaluate(encoder, decoder, input_tensor)
        l = 0.0
        for ps, ts in zip(nextSequence, target_tensor):
            l += lossFn(ps, ts)
        loss += l / len(nextSequence)
    return loss / n_vals

In [15]:
hidden_size = 256
input_size = trafficX.shape[2]
output_size = trafficY.shape[2]
num_layers = 2
modelDescription = "RNN with GRU, {} unit hidden state, {} layer GRU".format(hidden_size, num_layers) 
N_iters = 2000 #N_iters = trafficX.shape[0]
encoder1 = EncoderRNN(input_size, hidden_size, num_layers).to(device)
decoder1 = DecoderRNN(hidden_size, output_size, num_layers).to(device)
trainIters(encoder1, decoder1, trafficX, trafficY, N_iters, modelDescription, trafficValX, trafficValY, print_every=100, plot_every=N_iters/100)



0m 6s (- 2m 2s) (100 5%) 51.1468
0m 13s (- 2m 1s) (200 10%) 39.8079
0m 20s (- 1m 57s) (300 15%) 28.6940
0m 27s (- 1m 51s) (400 20%) 19.5892
0m 34s (- 1m 44s) (500 25%) 11.8336
0m 41s (- 1m 37s) (600 30%) 8.3118
0m 49s (- 1m 31s) (700 35%) 7.6935
0m 56s (- 1m 25s) (800 40%) 7.6266
1m 4s (- 1m 18s) (900 45%) 7.3595
1m 11s (- 1m 11s) (1000 50%) 6.9032
1m 18s (- 1m 4s) (1100 55%) 7.0101
1m 26s (- 0m 57s) (1200 60%) 7.4244
1m 33s (- 0m 50s) (1300 65%) 6.7797
1m 40s (- 0m 43s) (1400 70%) 7.8439
1m 47s (- 0m 35s) (1500 75%) 7.2518
1m 55s (- 0m 28s) (1600 80%) 7.4844
2m 2s (- 0m 21s) (1700 85%) 7.3034
2m 9s (- 0m 14s) (1800 90%) 6.9842
2m 17s (- 0m 7s) (1900 95%) 6.9402
2m 24s (- 0m 0s) (2000 100%) 7.7803


In [16]:
trafficData = np.load("../DCRNN/data/train.npz")

In [17]:
trafficData.files

['x', 'y', 'x_offsets', 'y_offsets']

In [18]:
trafficData["x_offsets"]

array([[-11],
       [-10],
       [ -9],
       [ -8],
       [ -7],
       [ -6],
       [ -5],
       [ -4],
       [ -3],
       [ -2],
       [ -1],
       [  0]])

In [19]:
trafficData['x'].shape

(23974, 12, 207, 2)

In [20]:
l = nn.L1Loss(size_average=False)
t1 = torch.FloatTensor([[1,2,3], [4,5,6]])
t2 = torch.FloatTensor([[2,3,4],[5,6,7]]) 
l(t1,t2)



tensor(6.)

In [21]:
validate(encoder1, decoder1, trafficValX, trafficValY, nn.L1Loss(), n_vals=trafficValX.shape[0])

tensor(8.7823)