In [5]:
# Watch for any changes in vocabulary.py, data_loader.py, utils.py or model.py, and re-load it automatically.
%load_ext autoreload
%autoreload 2

In [6]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
import sys
from pycocotools.coco import COCO
import math
import torch.utils.data as data
import numpy as np
import os
import requests
import time

from utils import train, validate, save_epoch, early_stopping
from data_loader import get_loader
from model import EncoderCNN, DecoderRNN

# Set values for the training variables
batch_size = 32        
vocab_threshold = 5     
vocab_from_file = True  
embed_size = 256        
hidden_size = 512       
num_epochs = 4          

In [7]:
# Define a transform to pre-process the training images
transform_train = transforms.Compose([ 
    transforms.Resize(256),                          
    transforms.RandomCrop(224),                      
    transforms.RandomHorizontalFlip(),               
    transforms.ToTensor(),                           
    transforms.Normalize((0.485, 0.456, 0.406),     
                         (0.229, 0.224, 0.225))])

# Define a transform to pre-process the validation images
transform_val = transforms.Compose([ 
    transforms.Resize(256),                          
    transforms.CenterCrop(224),                      
    transforms.ToTensor(),                           
    transforms.Normalize((0.485, 0.456, 0.406),      
                         (0.229, 0.224, 0.225))])

In [8]:
# Build data loader, applying the transforms
train_loader = get_loader(transform=transform_train,
                         mode='train',
                         batch_size=batch_size,
                         vocab_threshold=vocab_threshold,
                         vocab_from_file=vocab_from_file)
val_loader = get_loader(transform=transform_val,
                         mode='val',
                         batch_size=batch_size,
                         vocab_threshold=vocab_threshold,
                         vocab_from_file=vocab_from_file)


# The size of the vocabulary
vocab_size = len(train_loader.dataset.vocab)

# Initialize the encoder and decoder
encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, vocab_size)

# Move models to GPU if CUDA is available
if torch.cuda.is_available():
    encoder.cuda()
    decoder.cuda()

Vocabulary successfully loaded from vocab.pkl file!
loading annotations into memory...
Done (t=0.88s)
creating index...
index created!
Obtaining caption lengths...


100%|████████████████████████████████████████████████████████████████████████| 414113/414113 [00:41<00:00, 9871.35it/s]


Vocabulary successfully loaded from vocab.pkl file!
loading annotations into memory...
Done (t=0.46s)
creating index...
index created!
Obtaining caption lengths...


100%|███████████████████████████████████████████████████████████████████████| 202654/202654 [00:19<00:00, 10362.94it/s]


In [5]:
# Define the loss function
criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()

# Specify the learnable parameters of the model
params = list(decoder.parameters()) + list(encoder.embed.parameters()) + list(encoder.bn.parameters())

# Define the optimizer
optimizer = torch.optim.Adam(params=params, lr=0.001)

In [6]:
# Set the total number of training and validation steps per epoch
total_train_step = math.ceil(len(train_loader.dataset.caption_lengths) / train_loader.batch_sampler.batch_size)
total_val_step = math.ceil(len(val_loader.dataset.caption_lengths) / val_loader.batch_sampler.batch_size)
print ("Number of training steps:", total_train_step)
print ("Number of validation steps:", total_val_step)

Number of training steps: 12942
Number of validation steps: 6333


In [7]:
# Keep track of train and validation losses and validation Bleu-4 scores by epoch
train_losses = []
val_losses = []
val_bleus = []
# Keep track of the current best validation Bleu score
best_val_bleu = float("-INF")

start_time = time.time()
for epoch in range(4, num_epochs ):
    train_loss = train(train_loader, encoder, decoder, criterion, optimizer, 
                       vocab_size, epoch, total_train_step)
    train_losses.append(train_loss)
    val_loss, val_bleu = validate(val_loader, encoder, decoder, criterion,
                                  train_loader.dataset.vocab, epoch, total_val_step)
    val_losses.append(val_loss)
    val_bleus.append(val_bleu)
    if val_bleu > best_val_bleu:
        print ("Validation Bleu-4 improved from {:0.4f} to {:0.4f}, saving model to best-model.pkl".
               format(best_val_bleu, val_bleu))
        best_val_bleu = val_bleu
        filename = os.path.join("./models", "best-model.pkl")
        save_epoch(filename, encoder, decoder, optimizer, train_losses, val_losses, 
                   val_bleu, val_bleus, epoch)
    else:
        print ("Validation Bleu-4 did not improve, saving model to model-{}.pkl".format(epoch))
    # Save the entire model anyway, regardless of being the best model so far or not
    filename = os.path.join("./models", "model-{}.pkl".format(epoch))
    save_epoch(filename, encoder, decoder, optimizer, train_losses, val_losses, 
               val_bleu, val_bleus, epoch)
    print ("Epoch [%d/%d] took %ds" % (epoch, num_epochs, time.time() - start_time))
    if epoch > 5:
        # Stop if the validation Bleu doesn't improve for 3 epochs
        if early_stopping(val_bleus, 3):
            break
    start_time = time.time()

In [22]:
# Load the last checkpoints
checkpoint = torch.load(os.path.join('./models', 'train-model-412900.pkl'))

# Load the pre-trained weights
encoder.load_state_dict(checkpoint['encoder'])
decoder.load_state_dict(checkpoint['decoder'])
optimizer.load_state_dict(checkpoint['optimizer'])

# Load start_loss from checkpoint if in the middle of training process; otherwise, comment it out
start_loss = checkpoint['total_loss']

# Load epoch. Add 1 if we start a new epoch
epoch = checkpoint['epoch']
# Load start_step from checkpoint if in the middle of training process; otherwise, comment it out
start_step = checkpoint['train_step'] + 1

# Train 1 epoch at a time due to very long training time
train_loss = train(train_loader, encoder, decoder, criterion, optimizer, 
                   vocab_size, epoch, total_train_step, start_step, start_loss)

Epoch 4, Train step [12942/12942], 175s, Loss: 2.0197, Perplexity: 7.53603

In [23]:
# Load checkpoints
train_checkpoint = torch.load(os.path.join('./models', 'train-model-412900.pkl'))
epoch_checkpoint = torch.load(os.path.join('./models', 'model-3.pkl'))
best_checkpoint = torch.load(os.path.join('./models', 'best-model.pkl'))

# Load the pre-trained weights and epoch from the last train step
encoder.load_state_dict(train_checkpoint['encoder'])
decoder.load_state_dict(train_checkpoint['decoder'])
optimizer.load_state_dict(train_checkpoint['optimizer'])
epoch = train_checkpoint['epoch']

# Load from the previous epoch
train_losses = epoch_checkpoint['train_losses']
val_losses = epoch_checkpoint['val_losses']
val_bleus = epoch_checkpoint['val_bleus']

# Load from the best model
best_val_bleu = best_checkpoint['val_bleu']

train_losses.append(train_loss)
print (train_losses, val_losses, val_bleus, best_val_bleu)
print ("Training completed for epoch {}, saving model to train-model-{}.pkl".format(epoch, epoch))
filename = os.path.join("./models", "train-model-{}.pkl".format(epoch))
save_epoch(filename, encoder, decoder, optimizer, train_losses, val_losses, 
           best_val_bleu, val_bleus, epoch)

[2.356230806823566, 2.3558660150955935, 2.023856584595816] [2.2138556149565702, 2.2143045429669153] [0.11517005905200531, 0.11513478912242286] 0.11946923720486312
Training completed for epoch 4, saving model to train-model-4.pkl


In [28]:
# Load the last checkpoint
checkpoint = torch.load(os.path.join('./models', 'val-model-45800.pkl'))

# Load the pre-trained weights
encoder.load_state_dict(checkpoint['encoder'])
decoder.load_state_dict(checkpoint['decoder'])

# Load these from checkpoint if in the middle of validation process; otherwise, comment them out
start_loss = checkpoint['total_loss']
start_bleu = checkpoint['total_bleu_4']

# Load epoch
epoch = checkpoint['epoch']
# Load start_step from checkpoint if in the middle of training process; otherwise, comment it out
start_step = checkpoint['val_step'] + 1

# Validate 1 epoch at a time due to very long validation time
val_loss, val_bleu = validate(val_loader, encoder, decoder, criterion, 
                              train_loader.dataset.vocab, epoch, total_val_step, 
                              start_step, start_loss, start_bleu)

Epoch 4, Val step [5900/6333], 369s, Loss: 2.2063, Perplexity: 9.0817, Bleu-4: 0.13432
Epoch 4, Val step [6000/6333], 378s, Loss: 2.9563, Perplexity: 19.2258, Bleu-4: 0.1051
Epoch 4, Val step [6100/6333], 368s, Loss: 1.8908, Perplexity: 6.6245, Bleu-4: 0.13504
Epoch 4, Val step [6200/6333], 369s, Loss: 2.0759, Perplexity: 7.9716, Bleu-4: 0.13631
Epoch 4, Val step [6300/6333], 369s, Loss: 2.4022, Perplexity: 11.0471, Bleu-4: 0.0686
Epoch 4, Val step [6333/6333], 126s, Loss: 2.4394, Perplexity: 11.4666, Bleu-4: 0.0940

In [29]:
# Load checkpoints`
checkpoint = torch.load(os.path.join('./models', 'train-model-4.pkl'))
best_checkpoint = torch.load(os.path.join('./models', 'best-model.pkl'))

# Load the pre-trained weights
encoder.load_state_dict(checkpoint['encoder'])
decoder.load_state_dict(checkpoint['decoder'])
optimizer.load_state_dict(checkpoint['optimizer'])

# Load train and validation losses and validation Bleu-4 scores 
train_losses = checkpoint['train_losses']
val_losses = checkpoint['val_losses']
val_bleus = checkpoint['val_bleus']
best_val_bleu = best_checkpoint['val_bleu']

# Load epoch
epoch = checkpoint['epoch']    

val_losses.append(val_loss)
val_bleus.append(val_bleu)
print (train_losses, val_losses, val_bleus, best_val_bleu)

if val_bleu > best_val_bleu:
    print ("Validation Bleu-4 improved from {:0.4f} to {:0.4f}, saving model to best-model.pkl".
           format(best_val_bleu, val_bleu))
    best_val_bleu = val_bleu
    print (best_val_bleu)
    filename = os.path.join("./models", "best-model.pkl")
    save_epoch(filename, encoder, decoder, optimizer, train_losses, val_losses, 
               val_bleu, val_bleus, epoch)
else:
    print ("Validation Bleu-4 did not improve, saving model to model-{}.pkl".format(epoch))
# Save the entire model anyway, regardless of being the best model so far or not
filename = os.path.join("./models", "model-{}.pkl".format(epoch))
save_epoch(filename, encoder, decoder, optimizer, train_losses, val_losses, 
           val_bleu, val_bleus, epoch)
if epoch > 5:
    # Stop if the validation Bleu doesn't improve for 3 epochs
    if early_stopping(val_bleus, 3):
        print ("Val Bleu-4 doesn't improve anymore. Early stopping")


[2.356230806823566, 2.3558660150955935, 2.023856584595816] [2.2138556149565702, 2.2143045429669153, 2.171720981146136] [0.11517005905200531, 0.11513478912242286, 0.12158010500298949] 0.11946923720486312
Validation Bleu-4 improved from 0.1195 to 0.1216, saving model to best-model.pkl
0.12158010500298949
