In [93]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
%matplotlib inline
clip = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [164]:
class Data():
    def __init__(self, filepath):
        data = np.load(filepath)
        xs = data['x'][:,:,:,0]
        ys = data['y'][:,:,:,0]
        self.xExamples = xs.reshape((data['x'].shape[0], data['x'].shape[1], -1)) # num_examples x seq_length x num_features
        self.yExamples = ys.reshape((data['y'].shape[0], data['y'].shape[1], -1)) # num_examples x seq_length x num_features
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    def getSequenceLength(self):
        #return torch.IntTensor(self.xExamples.shape[1], device=self._device)
        return self.xExamples.shape[1]
    
    def getNumFeatures(self):
        #return torch.IntTensor(self.xExamples.shape[2], device=self._device)
        return self.xExamples.shape[2]
    
    def getNumSamples(self):
        return self.xExamples.shape[0]
    
    def getXShape(self):
        return self.xExamples.shape

    def random_batch(self, batch_size):
        input_seqs = []
        target_seqs = []
        
        #Choose random pairs
        for i in range(batch_size):
            pairIDX = np.random.randint(0, self.getNumSamples())
            input_seqs.append(self.xExamples[pairIDX, :, :])
            target_seqs.append(self.yExamples[pairIDX, :, :])
            
        input_lengths = torch.IntTensor([len(s) for s in input_seqs], device=self._device).cuda()
        target_lengths = torch.IntTensor([len(s) for s in target_seqs], device=self._device).cuda()
        
        #convert to tensors, transpose into (max_len, x batch_size)
        
        #input_var = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
        #target_var = Variable(torch.LongTensor(target_padded)).transpose(0, 1)
        inputTensor = torch.FloatTensor(input_seqs, device=self._device).cuda().transpose(0, 1)
        targetTensor = torch.FloatTensor(target_seqs, device=self._device).cuda().transpose(0, 1)
        return inputTensor, input_lengths, targetTensor, target_lengths

In [165]:
TrafficDataTrainObj = Data("./data/train.npz")
TrafficDataValObj = Data("./data/val.npz")

In [166]:
TrafficDataTrainObj.getXShape()

(23974, 12, 207)

In [167]:
torch.cuda.is_available()

True

In [169]:
inputTTest, input_lengthsTest, targetTTest, target_lengthsTest = TrafficDataTrainObj.random_batch(2)
inputTTest.size()
print(TrafficDataTrainObj.getNumSamples())
print(TrafficDataTrainObj.getNumFeatures())
print(TrafficDataTrainObj.getSequenceLength())

23974
207
12


In [208]:
class EncoderRNN(nn.Module):
    def __init__(self, sequence_length, num_features, hidden_size, n_layers=1, dropout=0.1):
        super(EncoderRNN, self).__init__()
        self.sequence_length = sequence_length
        self.num_features = num_features
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.dropout = dropout
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.gru = nn.GRU(num_features, hidden_size, n_layers, dropout=self.dropout).cuda()

        
    def forward(self, input_seqs, input_lengths, hidden=None):
        # Note: we run this all at once (over multiple batches of multiple sequences)
        sequence_lengths = torch.IntTensor([self.sequence_length for i in range(input_seqs.size(1))], device=self._device).cuda()
        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seqs, sequence_lengths).cuda()
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        return outputs, hidden

In [209]:
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        
        self.method = method
        self.hidden_size = hidden_size
        self._device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, self.hidden_size).cuda()

        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, self.hidden_size).cuda()
            self.v = nn.Parameter(torch.FloatTensor(1, self.hidden_size)).cuda() # source code has no device argument

    def forward(self, hidden, encoder_outputs):
        #hidden (rnn_output) size  torch.Size([1, 3, 8])
        #encoder_outputs size  torch.Size([12, 3, 8])
        max_len = encoder_outputs.size(0)
        this_batch_size = encoder_outputs.size(1)
        # Create variable to store attention energies
        attn_energies = torch.autograd.Variable(torch.zeros(this_batch_size, max_len, device=self._device)).cuda() # B x S
        # For each batch of encoder outputs
        for b in range(this_batch_size):
            # Calculate energy for each encoder output
            for i in range(max_len):
                # changing order of :,b in hidden[:,b]
                attn_energies[b, i] = self.score(hidden[:,b], encoder_outputs[i, b].unsqueeze(0)).cuda()

        # Normalize energies to weights in range 0 to 1, resize to 1 x B x S
        return F.softmax(attn_energies).unsqueeze(1)
    
    def score(self, hidden, encoder_output):
        if self.method == "general":
            energy = self.attn(encoder_output).cuda()
            energy = energy.squeeze()
            hidden = hidden.squeeze().cuda()
            energy = hidden.dot(energy)

            return energy
        else:
            assert False, "sorry I didn't implement that method yet"
            """
            if self.method == 'dot':
                energy = hidden.dot(encoder_output)
                return energy

            elif self.method == 'general':
                energy = self.attn(encoder_output)
                energy = energy.squeeze()
                hidden = hidden.squeeze()
                energy = hidden.dot(energy)

                return energy

            elif self.method == 'concat':
                energy = self.attn(torch.cat((hidden, encoder_output), 1))
                energy = self.v.dot(energy)
                return energy
            """                          

In [210]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1, method=None):
        super(AttnDecoderRNN, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # Define layers
        self.gru = nn.GRU(self.output_size, hidden_size, n_layers, dropout=dropout).cuda()
        self.concat = nn.Linear(hidden_size * 2, hidden_size).cuda()
        self.out = nn.Linear(hidden_size, output_size).cuda()
        if method != None:
            self.attn = Attn(method, hidden_size).cuda()

    def forward(self, input_seq, last_hidden, encoder_outputs):
        # Note: we run this one step at a time S = 1
        batch_size = input_seq.size(0)
        expandedInput = input_seq.unsqueeze(0)# S=1 x B X output size
        
        # Get current hidden state from decoder input and last hidden state
        rnn_output, hidden = self.gru(expandedInput.cuda(), last_hidden.cuda())
        
        #Calculate attention from current RNN state and all encoder outputs
        #apply  to encoder outputs to get weighted attention
        attn_weights = self.attn(rnn_output, encoder_outputs).cuda()
        context = attn_weights.bmm(encoder_outputs.transpose(0,1)).cuda() #B x S x NHidden
        
        rnn_output = rnn_output.squeeze(0) # S=1 x B x NHidden -> B x NHidden
        context = context.squeeze(1) # B x S=1 x NHidden -> B x NHidden
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = F.tanh(self.concat(concat_input))
        
        output = self.out(concat_output)
        
        # Return final output, hidden state
        return output, hidden, attn_weights

In [211]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(DecoderRNN, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # Define layers
        self.gru = nn.GRU(self.output_size, hidden_size, n_layers, dropout=dropout).cuda()
        self.concat = nn.Linear(hidden_size * 2, hidden_size).cuda()
        self.out = nn.Linear(hidden_size, output_size).cuda()

    def forward(self, input_seq, last_hidden):
        # Note: we run this one step at a time S = 1
        batch_size = input_seq.size(0)
        expandedInput = input_seq.unsqueeze(0)# S=1 x B X output size
        
        # Get current hidden state from decoder input and last hidden state
        rnn_output, hidden = self.gru(expandedInput.cuda(), last_hidden.cuda())
        
        output = self.out(rnn_output).cuda()
        
        # Return final output, hidden state
        return output, hidden

In [212]:
def testModels():
    small_batch_size = 3
    input_batches, input_lengths, target_batches, target_lengths = TrafficDataTrainObj.random_batch(small_batch_size)
    print('input_batches', input_batches.size()) # (max_len x batch_size x NFeatues)
    print('target_batches', target_batches.size()) # (max_len x batch_size x NFeatures)
    small_hidden_size = 8
    small_n_layers = 2
    
    #sequence_lengths, num_features, hidden_size, n_layers=1, dropout=0.1
    encoderTest = EncoderRNN(input_lengths[0], input_batches.size(2), small_hidden_size, n_layers=small_n_layers)
    
    #attn_model, hidden_size, output_size, n_layers=1, dropout=0.1
    #decoderTest = LuongAttnDecoderRNN('general', small_hidden_size, target_batches.size(2), n_layers=small_n_layers)
    
    #hidden_size, output_size, n_layers=1, dropout=0.1
    decoderTest = DecoderRNN(small_hidden_size, target_batches.size(2), n_layers=small_n_layers)
    
    #Encode
    encoder_outputs, encoder_hidden = encoderTest(input_batches, input_lengths, hidden=None)
    print('encoder_outputs', encoder_outputs.size()) # max_len x batch_size x hidden_size
    print('encoder_hidden', encoder_hidden.size()) # n_layers x batch_size x hidden_size
    
    # Decoder
    max_target_length = max(target_lengths)
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((small_batch_size, target_batches.size(2))), device="cuda"))

    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max_target_length, small_batch_size, target_batches.size(2), device="cuda")).cuda()

    # Run through decoder one time step at a time
    for t in range(max_target_length):
        #print("t=",t)
        decoder_output, decoder_hidden = decoderTest(decoder_input, decoder_hidden)
        all_decoder_outputs[t] = decoder_output
        decoder_input = target_batches[t] # Next input is current target
        
    loss = nn.L1Loss()
    print(type(all_decoder_outputs.detach()))
    print(type(target_batches))
    lossVal = loss(all_decoder_outputs.detach().cuda(), target_batches.cuda()) / max_target_length.data[0].float().cuda() / small_batch_size
    print("loss", lossVal)

In [213]:
testModels()

input_batches torch.Size([12, 3, 207])
target_batches torch.Size([12, 3, 207])
encoder_outputs torch.Size([12, 3, 8])
encoder_hidden torch.Size([2, 3, 8])
<class 'torch.Tensor'>
<class 'torch.Tensor'>
loss tensor(1.6430, device='cuda:0')




In [214]:
def testModelAttn():
    small_batch_size = 3
    input_batches, input_lengths, target_batches, target_lengths = TrafficDataTrainObj.random_batch(small_batch_size)
    print('input_batches', input_batches.size()) # (max_len x batch_size x NFeatues)
    print('target_batches', target_batches.size()) # (max_len x batch_size x NFeatures)
    small_hidden_size = 8
    small_n_layers = 2
    
    #sequence_lengths, num_features, hidden_size, n_layers=1, dropout=0.1
    encoderTest = EncoderRNN(input_lengths[0], input_batches.size(2), small_hidden_size, n_layers=small_n_layers)
    
    #self, hidden_size, output_size, n_layers=1, dropout=0.1
    decoderTest = AttnDecoderRNN(small_hidden_size, target_batches.size(2), n_layers=small_n_layers, method="general")
    
    #Encode
    encoder_outputs, encoder_hidden = encoderTest(input_batches, input_lengths[0], None)
    print('encoder_outputs', encoder_outputs.size()) # max_len x batch_size x hidden_size
    print('encoder_hidden', encoder_hidden.size()) # n_layers x batch_size x hidden_size
    
    # Decoder
    max_target_length = max(target_lengths)
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((small_batch_size, target_batches.size(2)))))

    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max_target_length, small_batch_size, target_batches.size(2)))

    # Run through decoder one time step at a time
    for t in range(max_target_length):
        #print("t=",t)
        decoder_output, decoder_hidden, attnVec = decoderTest(decoder_input, decoder_hidden, encoder_outputs)
        all_decoder_outputs[t] = decoder_output
        decoder_input = target_batches[t] # Next input is current target
        
    loss = nn.L1Loss()
    lossVal = loss(all_decoder_outputs.detach().cuda(), target_batches.cuda()) / small_batch_size / max_target_length.float()
    print("loss", lossVal)

In [215]:
testModelAttn()

input_batches torch.Size([12, 3, 207])
target_batches torch.Size([12, 3, 207])
encoder_outputs torch.Size([12, 3, 8])
encoder_hidden torch.Size([2, 3, 8])
loss tensor(1.7399, device='cuda:0')




In [226]:
def trainOneBatch(input_batch, input_lengths, target_batch, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    # Zero gradients of both optimizers
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    loss = 0 # Added onto for each word
    batch_size = input_batch.size(1)
    
    # Run words through encoder
    encoder_outputs, encoder_hidden = encoder(input_batch, input_lengths, None)
    
    # Prepare decoder input and output variables
    decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((batch_size, target_batch.size(2))), device="cuda")).cuda()
    
    # use last encoder hidden as initial decoder hidden
    decoder_hidden = encoder_hidden.cuda()
    all_decoder_outputs = torch.autograd.Variable(torch.zeros(max(target_lengths), batch_size, target_batch.size(2), device="cuda")).cuda()

    # Decode
    for t in range(max(target_lengths
                      )):
        decoder_output, decoder_hidden, decoder_attn = decoder(
            decoder_input, decoder_hidden, encoder_outputs
        )
        all_decoder_outputs[t] = decoder_output.cuda()
        decoder_input = target_batch[t].detach().cuda() # Next input is current target
    
    #Calculate Loss
    if criterion[1] == "MSE":
        loss += torch.sqrt(criterion[0])(all_decoder_outputs.cuda(), target_batch.cuda())
    elif criterion[1] == "Mean Absolute Error":
        loss += criterion[0](all_decoder_outputs.cuda(), target_batch.cuda())
    else:
        assert False, "Cannot match loss"
    
    loss.backward()
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item() / float(max(target_lengths)) / float(batch_size)

In [227]:
def evaluate(input_batch, input_lengths, target_batch, target_lengths, encoder, decoder):
    with torch.no_grad():
        loss = 0 # Added onto for each word
        batch_size = input_batch.size(1)
        num_features = input_batch.size(2)

        # Run batch through encoder
        encoder_outputs, encoder_hidden = encoder(input_batch, input_lengths, None)

        # Prepare decoder input and output variables
        decoder_input = torch.autograd.Variable(torch.FloatTensor(np.zeros((batch_size, num_features)), device="cuda")).cuda()

        # use last encoder hidden as initial decoder hidden
        decoder_hidden = encoder_hidden.cuda()
        all_decoder_outputs = torch.autograd.Variable(torch.zeros(max(target_lengths), batch_size, num_features, device="cuda")).cuda()

        # Decode One sequence at a time
        for t in range(max(target_lengths)):
            decoder_output, decoder_hidden, decoder_attn = decoder(
                decoder_input, decoder_hidden, encoder_outputs
            )
            all_decoder_outputs[t] = decoder_output.cuda()
            decoder_input = target_batch[t].cuda() # Next input is current target
        return all_decoder_outputs

In [233]:
def validate(validationDataObj, batch_size, encoder, decoder, criterion):
    valLoss = 0.0
    num_batches = int(np.ceil(validationDataObj.getNumSamples() / batch_size))
    for iteration in range(num_batches):
        inputTensor, input_lengths, targetTensor, target_lengths = validationDataObj.random_batch(batch_size)
        output = evaluate(inputTensor, input_lengths, targetTensor, target_lengths, encoder, decoder)
        valLoss += criterion[0](output.cuda(), targetTensor.cuda())
    return valLoss / num_batches / max(target_lengths).float() / float(batch_size)

In [234]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [235]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
import os

def plotTrainValCurve(trainLosses, valLosses, model_description, lossDescription):
    plt.rcParams.update({'font.size': 8})
    plt.figure()
    fig, ax = plt.subplots()
    plt.xlabel("Epoch")
    plt.ylabel(lossDescription)
    plt.plot(np.arange(len(trainLosses)), trainLosses, color="red", label="train loss")
    plt.plot(np.arange(len(valLosses)), valLosses, color="blue", label="validation loss")
    plt.grid()
    plt.legend()
    plt.title("Losses for {}".format(model_description))
    
    filestring = "./figs/train_val_loss_plot_0.png"
    while(os.path.isfile(filestring)):
        filestring = filestring[:-5] + str(int(filestring[-5]) + 1) + ".png"
    plt.savefig(filestring)

In [236]:
def train(dataObj, validationDataObj, n_layers= 2, hidden_state_size=64,
          n_epochs=100, batch_size = 64, initialLR = 1e-2,
          lrDecayRatio=0.10, lrDecayBegginingEpoch=20,
          lrDecayEvery=10, print_every=1000, plot_every=100,
          batchesPerEpoch=None):

    if not batchesPerEpoch:
        batchesPerEpoch=int(np.ceil(dataObj.getNumSamples() / batch_size))
    start = time.time()
    trainEpochLosses = []
    valEpochLosses = []
    # Define Criterion
    #criterion = (nn.MSELoss(),"MSE")
    criterion = (nn.L1Loss(size_average=True), "Mean Absolute Error")
    
    sequence_length = dataObj.getSequenceLength()
    num_features = dataObj.getNumFeatures()
    
    encoder = EncoderRNN(sequence_length, num_features,
                             hidden_state_size, n_layers=n_layers)
    
    decoder = AttnDecoderRNN(hidden_state_size, num_features,
                                 n_layers=n_layers, method="general")
    modelDescription = "Encoder Decoder RNN w/ Attn - hidden state size: {}, layers: {}".format(hidden_state_size, n_layers)
    lr = initialLR
    for epoch in range(n_epochs):
        print("epoch {}".format(epoch + 1))
        epochTrainLoss = 0.0
        # Check whether we need to reduce learning rate
        if (epoch >= 20) and (epoch % 10 == 0):
            lr = lr * (1 - lrDecayRatio)
        encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr)
        decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr)
        for iteration in range(batchesPerEpoch):
            print("iteration {}".format(iteration))
            #if (iteration % 10) == 0:
                #print("iteration {} of {}".format(iteration, batchesPerEpoch))
            inputTensor, input_lengths, targetTensor, target_lengths = dataObj.random_batch(batch_size)
            batchLoss = trainOneBatch(inputTensor, input_lengths, targetTensor, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            print("batch loss: {}".format(batchLoss))
            epochTrainLoss += batchLoss
        print("validating")
        # Average validation loss per batch over all validation batches
        epochValidationLoss = validate(validationDataObj, batch_size, encoder, decoder, criterion)
        # Print stats for current epoch
        print('%s (%d %d%%) train loss: %.4f validation loss: %.4f' % (timeSince(start, epoch + 1 / n_epochs),
                                         epoch, epoch + 1 / n_epochs * 100, epochTrainLoss / float(batchesPerEpoch), epochValidationLoss))
        valEpochLosses.append(epochValidationLoss)
        trainEpochLosses.append(epochTrainLoss / float(batchesPerEpoch))
    plotTrainValCurve(trainEpochLosses, valEpochLosses, modelDescription, criterion[1])
    return trainEpochLosses, valEpochLosses

In [237]:
import cProfile
cProfile.run("train(TrafficDataTrainObj, TrafficDataValObj, n_epochs=2, batchesPerEpoch=20)")
#trainLosses, valLosses = train(TrafficDataTrainObj, TrafficDataValObj, n_epochs=2, batchesPerEpoch=20)

epoch 1
iteration 0




batch loss: 0.07083038985729218
iteration 1
batch loss: 0.0674345592657725
iteration 2
batch loss: 0.07188974817593892
iteration 3
batch loss: 0.07211985190709432
iteration 4
batch loss: 0.0704691509405772
iteration 5
batch loss: 0.07506118714809418
iteration 6
batch loss: 0.07406472166379292
iteration 7
batch loss: 0.06742992997169495
iteration 8
batch loss: 0.07345942159493764
iteration 9
batch loss: 0.07411357760429382
iteration 10
batch loss: 0.06839462121327718
iteration 11
batch loss: 0.0668075184027354
iteration 12
batch loss: 0.07518320779005687
iteration 13
batch loss: 0.07265621920426686
iteration 14
batch loss: 0.06984050075213115
iteration 15
batch loss: 0.07020787398020427
iteration 16
batch loss: 0.0688706487417221
iteration 17
batch loss: 0.07062668601671855
iteration 18
batch loss: 0.0648260513941447
iteration 19
batch loss: 0.07155829171339671
validating
1m 55s (- 1m 55s) (0 50%) train loss: 0.0708 validation loss: 0.0712
epoch 2
iteration 0
batch loss: 0.0680212477842

        3    0.000    0.000    0.000    0.000 figure.py:450(_get_axes)
       68    0.000    0.000    0.000    0.000 figure.py:460(_get_dpi)
        3    0.000    0.000    0.000    0.000 figure.py:463(_set_dpi)
        1    0.000    0.000    0.000    0.000 figure.py:480(get_tight_layout)
        2    0.000    0.000    0.000    0.000 figure.py:484(set_tight_layout)
        4    0.000    0.000    0.000    0.000 figure.py:502(get_constrained_layout)
        2    0.000    0.000    0.000    0.000 figure.py:510(set_constrained_layout)
        2    0.000    0.000    0.000    0.000 figure.py:541(set_constrained_layout_pads)
      204    0.000    0.000    0.000    0.000 figure.py:55(_stale_figure_callback)
        2    0.000    0.000    0.000    0.000 figure.py:76(__init__)
        3    0.000    0.000    0.000    0.000 figure.py:778(set_canvas)
        4    0.000    0.000    0.000    0.000 figure.py:80(as_list)
        4    0.000    0.000    0.000    0.000 figure.py:84(<listcomp>)
        4    

       25    0.000    0.000    0.000    0.000 transforms.py:357(ymax)
        6    0.000    0.000    0.000    0.000 transforms.py:364(min)
        6    0.000    0.000    0.000    0.000 transforms.py:371(max)
       23    0.000    0.000    0.000    0.000 transforms.py:378(intervalx)
       23    0.000    0.000    0.000    0.000 transforms.py:386(intervaly)
       12    0.000    0.000    0.000    0.000 transforms.py:395(width)
       12    0.000    0.000    0.000    0.000 transforms.py:404(height)
       53    0.000    0.000    0.000    0.000 transforms.py:422(bounds)
       20    0.000    0.000    0.000    0.000 transforms.py:550(anchored)
       12    0.000    0.000    0.000    0.000 transforms.py:649(count_contains)
        6    0.000    0.000    0.000    0.000 transforms.py:665(count_overlaps)
        6    0.000    0.000    0.000    0.000 transforms.py:674(<listcomp>)
       20    0.000    0.000    0.000    0.000 transforms.py:689(padded)
       24    0.000    0.000    0.000    0.000

FileNotFoundError: [Errno 2] No such file or directory: './figs/train_val_loss_plot_0.png'