In [55]:
%matplotlib inline
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
clip = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [272]:
class Data():
    def __init__(self, filepath):
        data = np.load(filepath)
        xs = data['x'][:,:,:,0]
        ys = data['y'][:,:,:,0]
        self.xExamples = xs.reshape((data['x'].shape[0], data['x'].shape[1], -1)) # num_examples x seq_length x num_features
        self.yExamples = ys.reshape((data['y'].shape[0], data['y'].shape[1], -1)) # num_examples x seq_length x num_features
        
    def getSequenceLength(self):
        return self.xExamples.shape[0]
    
    def getNumFeatures(self):
        return sef.xExamples.shape[1]

    def random_batch(self, batch_size):
        input_seqs = []
        target_seqs = []
        
        #Choose random pairs
        for i in range(batch_size):
            pairIDX = np.random.randint(0, self.xExamples.shape[0])
            input_seqs.append(self.xExamples[pairIDX, :, :])
            target_seqs.append(self.yExamples[pairIDX, :, :])
            
        input_lengths = torch.IntTensor([len(s) for s in input_seqs])
        target_lengths = torch.IntTensor([len(s) for s in target_seqs])
        
        #convert to tensors, transpose into (max_len, x batch_size)
        
        #input_var = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
        #target_var = Variable(torch.LongTensor(target_padded)).transpose(0, 1)
        inputTensor = torch.FloatTensor(input_seqs).transpose(0, 1)
        targetTensor = torch.FloatTensor(target_seqs).transpose(0, 1)
        return inputTensor, input_lengths, targetTensor, target_lengths

In [35]:
TrafficDataTrainObj = Data("../DCRNN/data/train.npz")
TrafficDataValObj = Data("../DCRNN/data/val.npz")

In [36]:
MAX_LENGTH = TrafficDataTrainObj.xExamples.shape[1]

In [37]:
inputTTest, input_lengthsTest, targetTTest, target_lengthsTest = TrafficDataTrainObj.random_batch(2)
inputTTest.size()
#TrafficDataTrainObj.xExamples.shape

torch.Size([12, 2, 207])

In [264]:
class EncoderRNN(nn.Module):
    def __init__(self, sequence_length, num_features, hidden_size, n_layers=1, dropout=0.1):
        super(EncoderRNN, self).__init__()
        self.sequence_length = sequence_length
        self.num_features = num_features
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.gru = nn.GRU(num_features, hidden_size, n_layers, dropout=self.dropout)
        
    def forward(self, input_seqs, input_lengths, hidden=None):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        #print("encoder input size", input_seqs.size())
        sequence_lengths = torch.IntTensor([self.sequence_length for i in range(input_seqs.size(1))])
        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seqs, sequence_lengths)
        #print("packed size",packed.data.size())
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        #print("encoder output size", outputs.size())
        return outputs, hidden

In [265]:
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        
        self.method = method
        self.hidden_size = hidden_size
        
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, self.hidden_size)

        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, self.hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(1, self.hidden_size))

    def forward(self, hidden, encoder_outputs):
        #hidden (rnn_output) size  torch.Size([1, 3, 8])
        #encoder_outputs size  torch.Size([12, 3, 8])
        max_len = encoder_outputs.size(0)
        this_batch_size = encoder_outputs.size(1)
        # Create variable to store attention energies
        attn_energies = torch.autograd.Variable(torch.zeros(this_batch_size, max_len)) # B x S
        #if USE_CUDA:
        #    attn_energies = attn_energies.cuda()

        # For each batch of encoder outputs
        for b in range(this_batch_size):
            # Calculate energy for each encoder output
            for i in range(max_len):
                # changing order of :,b in hidden[:,b]
                attn_energies[b, i] = self.score(hidden[:,b], encoder_outputs[i, b].unsqueeze(0))

        # Normalize energies to weights in range 0 to 1, resize to 1 x B x S
        return F.softmax(attn_energies).unsqueeze(1)
    
    def score(self, hidden, encoder_output):
        if self.method == "general":
            energy = self.attn(encoder_output)
            energy = energy.squeeze()
            hidden = hidden.squeeze()
            energy = hidden.dot(energy)

            return energy
        else:
            assert False, "sorry I didn't implement that method yet"
            """
            if self.method == 'dot':
                energy = hidden.dot(encoder_output)
                return energy

            elif self.method == 'general':
                energy = self.attn(encoder_output)
                energy = energy.squeeze()
                hidden = hidden.squeeze()
                energy = hidden.dot(energy)

                return energy

            elif self.method == 'concat':
                energy = self.attn(torch.cat((hidden, encoder_output), 1))
                energy = self.v.dot(energy)
                return energy
            """                          

In [266]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1, method=None):
        super(AttnDecoderRNN, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        # Define layers
        self.gru = nn.GRU(self.output_size, hidden_size, n_layers, dropout=dropout)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        if method != None:
            self.attn = Attn(method, hidden_size)

    def forward(self, input_seq, last_hidden, encoder_outputs):
        # Note: we run this one step at a time S = 1
        batch_size = input_seq.size(0)
        expandedInput = input_seq.unsqueeze(0)# S=1 x B X output size
        
        # Get current hidden state from decoder input and last hidden state
        rnn_output, hidden = self.gru(expandedInput, last_hidden)
        
        #Calculate attention from current RNN state and all encoder outputs
        #apply  to encoder outputs to get weighted attention
        attn_weights = self.attn(rnn_output, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0,1)) #B x S x NHidden
        
        rnn_output = rnn_output.squeeze(0) # S=1 x B x NHidden -> B x NHidden
        context = context.squeeze(1) # B x S=1 x NHidden -> B x NHidden
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = F.tanh(self.concat(concat_input))
        
        output = self.out(concat_output)
        
        # Return final output, hidden state
        return output, hidden, attn_weights

In [267]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(DecoderRNN, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        # Define layers
        self.gru = nn.GRU(self.output_size, hidden_size, n_layers, dropout=dropout)
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq, last_hidden):
        # Note: we run this one step at a time S = 1
        batch_size = input_seq.size(0)
        expandedInput = input_seq.unsqueeze(0)# S=1 x B X output size
        
        # Get current hidden state from decoder input and last hidden state
        rnn_output, hidden = self.gru(expandedInput, last_hidden)
        
        output = self.out(rnn_output)
        
        # Return final output, hidden state
        return output, hidden

In [270]:
def testModels():
    small_batch_size = 3
    input_batches, input_lengths, target_batches, target_lengths = TrafficDataTrainObj.random_batch(small_batch_size)
    print('input_batches', input_batches.size()) # (max_len x batch_size x NFeatues)
    print('target_batches', target_batches.size()) # (max_len x batch_size x NFeatures)
    small_hidden_size = 8
    small_n_layers = 2
    
    #sequence_lengths, num_features, hidden_size, n_layers=1, dropout=0.1
    encoderTest = EncoderRNN(input_lengths[0], input_batches.size(2), small_hidden_size, n_layers=small_n_layers)
    
    #attn_model, hidden_size, output_size, n_layers=1, dropout=0.1
    #decoderTest = LuongAttnDecoderRNN('general', small_hidden_size, target_batches.size(2), n_layers=small_n_layers)
    
    #hidden_size, output_size, n_layers=1, dropout=0.1
    decoderTest = DecoderRNN(small_hidden_size, target_batches.size(2), n_layers=small_n_layers)
    
    #Encode
    encoder_outputs, encoder_hidden = encoderTest(input_batches, input_lengths, hidden=None)
    print('encoder_outputs', encoder_outputs.size()) # max_len x batch_size x hidden_size
    print('encoder_hidden', encoder_hidden.size()) # n_layers x batch_size x hidden_size
    
    # Decoder
    max_target_length = max(target_lengths)
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((small_batch_size, target_batches.size(2)))))

    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max_target_length, small_batch_size, target_batches.size(2)))

    # Run through decoder one time step at a time
    for t in range(max_target_length):
        #print("t=",t)
        decoder_output, decoder_hidden = decoderTest(decoder_input, decoder_hidden)
        all_decoder_outputs[t] = decoder_output
        decoder_input = target_batches[t] # Next input is current target
        
    loss = nn.L1Loss()
    lossVal = loss(all_decoder_outputs.detach(), target_batches)
    print("loss", lossVal)

In [271]:
testModels()

input_batches torch.Size([12, 3, 207])
target_batches torch.Size([12, 3, 207])
encoder_outputs torch.Size([12, 3, 8])
encoder_hidden torch.Size([2, 3, 8])
loss tensor(57.5563)


In [256]:
def testModelAttn():
    small_batch_size = 3
    input_batches, input_lengths, target_batches, target_lengths = TrafficDataTrainObj.random_batch(small_batch_size)
    print('input_batches', input_batches.size()) # (max_len x batch_size x NFeatues)
    print('target_batches', target_batches.size()) # (max_len x batch_size x NFeatures)
    small_hidden_size = 8
    small_n_layers = 2
    
    #sequence_lengths, num_features, hidden_size, n_layers=1, dropout=0.1
    encoderTest = EncoderRNN(input_lengths, input_batches.size(2), small_hidden_size, n_layers=small_n_layers)
    
    #self, hidden_size, output_size, n_layers=1, dropout=0.1
    decoderTest = AttnDecoderRNN(small_hidden_size, target_batches.size(2), n_layers=small_n_layers, method="general")
    
    #Encode
    encoder_outputs, encoder_hidden = encoderTest(input_batches, input_lengths, None)
    print('encoder_outputs', encoder_outputs.size()) # max_len x batch_size x hidden_size
    print('encoder_hidden', encoder_hidden.size()) # n_layers x batch_size x hidden_size
    
    # Decoder
    max_target_length = max(target_lengths)
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((small_batch_size, target_batches.size(2)))))

    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max_target_length, small_batch_size, target_batches.size(2)))

    # Run through decoder one time step at a time
    for t in range(max_target_length):
        #print("t=",t)
        decoder_output, decoder_hidden, attnVec = decoderTest(decoder_input, decoder_hidden, encoder_outputs)
        all_decoder_outputs[t] = decoder_output
        decoder_input = target_batches[t] # Next input is current target
        
    loss = nn.L1Loss()
    lossVal = loss(all_decoder_outputs.detach(), target_batches)
    print("loss", lossVal)

In [257]:
testModelAttn()

input_batches torch.Size([12, 3, 207])
target_batches torch.Size([12, 3, 207])
encoder_outputs torch.Size([12, 3, 8])
encoder_hidden torch.Size([2, 3, 8])
loss tensor(58.7094)




In [None]:
def trainOneBatch(input_batch, input_lengths, target_batch, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    
    # Zero gradients of both optimizers
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    loss = 0 # Added onto for each word

    # Run words through encoder
    encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths, None)
    
    # Prepare decoder input and output variables
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((small_batch_size, target_batches.size(2)))))
    
    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max(target_lengths), small_batch_size, target_batches.size(2)))

    # Decode
    for t in range(max(target_lengths
                      )):
        decoder_output, decoder_hidden, decoder_attn = decoder(
            decoder_input, decoder_hidden, encoder_outputs
        )
        all_decoder_outputs[t] = decoder_output
        decoder_input = target_batches[t] # Next input is current target
    
    #Calculate Loss
    if criterion[1] == "MSE":
        loss += torch.sqrt(criterion[0])(all_decoder_outputs.detach(), target_batch)
    elif criterion[1] == "Mean Absolute Error":
        loss += criterion[0](all_decoder_outputs.detach(), target_batch)
    else:
        assert False, "Cannot match loss"
    
    loss.backward()
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item() / max(target_lengths)

In [None]:
def train(dataObj, n_layers= 2, hidden_state_size=64,n_epochs=100, batch_size = 64, initialLR = 1e-2, lrDecayRatio=0.10, lrDecayBegginingEpoch=20, lrDecayEvery=10):
    # Encoder Params:
    # sequence_lengths, num_features, hidden_size, n_layers=1, dropout=0.1)
    
    sequence_length = dataObj.getSequenceLength()
    num_features = dataObj.getNumFeatures()
    
    encoderTest = EncoderRNN(sequence_length, num_features,
                             hidden_state_size, n_layers=n_layers)
    
    decoderTest = AttnDecoderRNN(hidden_state_size, num_features,
                                 n_layers=n_layers, method="general")
    lr = initialLR
    for epoch in range(n_epochs):
        # Check whether we need to reduce learning rate
        if (epoch >= 20) and (epoch % 10 == 0):
            lr = lr * (1 - lrDecayRatio)
        encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr)
        decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr)
        

In [None]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def trainIters(encoder, decoder, xMatrix, yMatrix, n_iters, model_description, xVal, yVal, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    validationLosses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    #criterion = (nn.MSELoss(),"MSE")
    criterion = (nn.L1Loss(size_average=True), "Mean Absolute Error")
    #valLoss = (nn.MSELoss(), "MSE")
    valLoss = (nn.L1Loss(size_average=True), "Mean Absolute Error")
    for iter in range(1, n_iters+1):
        choice = np.random.randint(0, xMatrix.shape[0])
        input_tensor = torch.FloatTensor(xMatrix[choice], device=device)
        target_tensor = torch.FloatTensor(yMatrix[choice], device=device)
        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss
        
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            # Get validation loss
            validationLosses.append(validate(encoder, decoder, xVal, yVal, valLoss[0]))
    showPlot(validationLosses, plot_every, model_description, criterion[1], training=False)
    showPlot(plot_losses, plot_every, model_description, valLoss[1], training=True)

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import os

def showPlot(points, plot_every, model_description, lossDescription, training=True):
    plt.rcParams.update({'font.size': 8})
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    #loc = ticker.MultipleLocator(base=0.2)
    #ax.yaxis.set_major_locator(loc)
    
    plt.xlabel("iteration")
    plt.ylabel(lossDescription)
    plt.plot(np.arange(len(points))*plot_every,points)
    plt.grid()
    plt.title("{} {} - {}".format("training" if training else "Validation", lossDescription, model_description))
    filestring = "./figs/{}_loss_plot_0.png".format("training" if training else "Validation")
    while(os.path.isfile(filestring)):
        filestring = filestring[:-5] + str(int(filestring[-5]) + 1) + ".png"
    plt.savefig(filestring)

In [None]:
def evaluate(encoder, decoder, inputSequence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = inputSequence
        input_length = input_tensor.size()[0] # should be 12 for traffic data 1 hr.
        encoder_hidden = encoder.initHidden()
        
        encoder_outputs = torch.zeros(
            max_length,encoder.hidden_size, device=device)
        
        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]
        
        decoder_input = torch.tensor([np.zeros(input_tensor.size()[-1])], device=device)
        
        decoder_hidden = encoder_hidden
        
        nextSequence = []
        #decoder_attention = torch.zeros(max_length, max_length)
        
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            
            #decoder_attentions[di] = decoder_attention.data
            #topv, topi = decoder_output.data.topk(1)
#             if topi.item() == NULLCHAR:
#                 nextSequence.append('XX')
#                 break
#             else:
#                 nextSequence.append(topi.item())
            nextSequence.append(decoder_output.data.squeeze().detach())
            
            decoder_input = decoder_output.data.squeeze().detach()
        return nextSequence


In [None]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(tensorPairs)
        print('>', pair[0])
        print('=', pair[1])
        nextSequence = evaluate(encoder, decoder, pair[0])
        outputSequence = ' '.join(nextSequence)
        print('<', outputSequence)
        print('')

In [None]:
def validate(encoder, decoder, xMatrix, yMatrix, lossFn, n_vals=100):
    loss = 0.0
    for i in range(n_vals):
        choice = np.random.randint(0, xMatrix.shape[0])
        input_tensor = torch.FloatTensor(xMatrix[choice], device=device)
        target_tensor = torch.FloatTensor(yMatrix[choice], device=device)
        nextSequence = evaluate(encoder, decoder, input_tensor)
        l = 0.0
        for ps, ts in zip(nextSequence, target_tensor):
            l += lossFn(ps, ts)
        loss += l / len(nextSequence)
    return loss / n_vals

In [None]:
hidden_size = 256
input_size = trafficX.shape[2]
output_size = trafficY.shape[2]
num_layers = 2
modelDescription = "RNN with GRU, {} unit hidden state, {} layer GRU".format(hidden_size, num_layers) 
N_iters = 2000 #N_iters = trafficX.shape[0]
encoder1 = EncoderRNN(input_size, hidden_size, num_layers).to(device)
decoder1 = DecoderRNN(hidden_size, output_size, num_layers).to(device)
trainIters(encoder1, decoder1, trafficX, trafficY, N_iters, modelDescription, trafficValX, trafficValY, print_every=100, plot_every=N_iters/100)

In [None]:
trafficData = np.load("../DCRNN/data/train.npz")

In [None]:
trafficData.files

In [None]:
trafficData["x_offsets"]

In [None]:
trafficData['x'].shape

In [None]:
l = nn.L1Loss(size_average=False)
t1 = torch.FloatTensor([[1,2,3], [4,5,6]])
t2 = torch.FloatTensor([[2,3,4],[5,6,7]]) 
l(t1,t2)

In [None]:
validate(encoder1, decoder1, trafficValX, trafficValY, nn.L1Loss(), n_vals=trafficValX.shape[0])

In [None]:
t1 = torch.FloatTensor([[[1, -1],[2, -2],[3, -3],[4,-4]], [[5, -5],[6, -6],[7,-7],[8, -8]], [[9,-9],[10,-10], [11,-11],[12,-12]]])
t1.size()

In [None]:
t1

In [None]:
t2 = t1.transpose(0,1)
t2

In [None]:
t1[0,:,:]

In [None]:
t2[:,0,:]