In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/VisualQuestionAnswering

/content/drive/MyDrive/VisualQuestionAnswering


In [3]:
%ls

[0m[01;34mconfig[0m/  [01;34mdataHm4[0m/  dataHm4.zip  main.ipynb  [01;34m__pycache__[0m/  requirements.txt  [01;34mscratch[0m/  [01;34msrc[0m/  todo.py


In [4]:
# from zipfile import ZipFile

# with ZipFile('dataHm4.zip', 'r') as z:

#     z.extractall(path='.')

In [5]:
# ! pip install -r requirements.txt

In [6]:
# !unzip -u dataHm4.zip -d .

In [7]:
import os
import argparse
import yaml
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

from src.dataset import VQADataset, VQABatchSampler
from src.train import train_model, test_model
from todo import VQAModel
from src.scheduler import CustomReduceLROnPlateau

In [8]:
def load_datasets(config, phases):
    config = config['data']
    if 'preprocess' in config and config['preprocess']:
        print('Preprocessing datasets')
        preprocess(
            data_dir=config['dir'],
            train_ques_file=config['train']['ques'],
            train_ans_file=config['train']['ans'],
            val_ques_file=config['val']['ques'],
            val_ans_file=config['val']['ans'])

    print('Loading preprocessed datasets')
    datafiles = {x: '{}.pkl'.format(x) for x in phases}
    raw_images = not ('preprocess' in config['images'] and config['images']['preprocess'])
    if raw_images:
        img_dir = {x: config[x]['img_dir'] for x in phases}
    else:
        img_dir = {x: config[x]['emb_dir'] for x in phases}
    datasets = {x: VQADataset(data_dir=config['dir'], qafile=datafiles[x], img_dir=img_dir[x], phase=x,
                              img_scale=config['images']['scale'], img_crop=config['images']['crop'], raw_images=raw_images) for x in phases}
    batch_samplers = {x: VQABatchSampler(
        datasets[x], config[x]['batch_size']) for x in phases}

    dataloaders = {x: DataLoader(
        datasets[x], batch_sampler=batch_samplers[x], num_workers=config['loader']['workers']) for x in phases}
    dataset_sizes = {x: len(datasets[x]) for x in phases}
    print(dataset_sizes)
    print("ques vocab size: {}".format(len(VQADataset.ques_vocab)))
    print("ans vocab size: {}".format(len(VQADataset.ans_vocab)))
    return dataloaders, VQADataset.ques_vocab, VQADataset.ans_vocab

In [9]:
config = yaml.safe_load(open("config/config_vqa_sgd.yml"))
config['use_gpu'] = config['use_gpu'] and torch.cuda.is_available()
torch.manual_seed(config['seed'])
torch.cuda.manual_seed(config['seed'])

if config['mode'] == 'test':
    phases = ['train', 'test']
else:
    phases = ['train', 'val']
dataloaders, ques_vocab, ans_vocab = load_datasets(config, phases)

# add model parameters to config
config['model']['params']['vocab_size'] = len(ques_vocab)
config['model']['params']['output_size'] = len(ans_vocab) - 1   # -1 as don't want model to predict '<unk>'
config['model']['params']['extract_img_features'] = 'preprocess' in config['data']['images'] and config['data']['images']['preprocess']
# which features dir? test, train or validate?
config['model']['params']['features_dir'] = os.path.join(
    config['data']['dir'], config['data']['train']['emb_dir'])
model = VQAModel(mode=config['mode'], **config['model']['params'])
print(model)
criterion = nn.CrossEntropyLoss()

if config['optim']['class'] == 'sgd':
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
                            **config['optim']['params'])
elif config['optim']['class'] == 'rmsprop':
    optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()),
                                **config['optim']['params'])
else:
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                            **config['optim']['params'])

best_acc = 0

startEpoch = 0
if 'reload' in config['model']:
    pathForTrainedModel = os.path.join(config['save_dir'],
                                        config['model']['reload'])
    if os.path.exists(pathForTrainedModel):
        print(
            "=> loading checkpoint/model found at '{0}'".format(pathForTrainedModel))
        checkpoint = torch.load(pathForTrainedModel)
        startEpoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        # optimizer.load_state_dict(checkpoint['optimizer'])
if config['use_gpu']:
    model = model.cuda()

print('config mode ', config['mode'])
save_dir = os.path.join(os.getcwd(), config['save_dir'])


Loading preprocessed datasets




{'train': 53220, 'val': 21303}
ques vocab size: 10516
ans vocab size: 1001
VQAModel(
  (image_encoder): ImageEmbedding(
    (extractor): VGG(
      (features): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
        (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): ReLU(inplace=True)
        (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (6): ReLU(inplace=True)
        (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (8): ReLU(inplace=True)
        (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (11): ReLU(inplace=True)
        (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (

In [12]:
'Epochs : %s' % config['optim']['n_epochs']

'Epochs : 5'

In [13]:
if config['mode'] == 'train':
    if 'scheduler' in config['optim'] and config['optim']['scheduler'].lower() == 'CustomReduceLROnPlateau'.lower():
        print('CustomReduceLROnPlateau')
        exp_lr_scheduler = CustomReduceLROnPlateau(
            optimizer, config['optim']['scheduler_params']['maxPatienceToStopTraining'], config['optim']['scheduler_params']['base_class_params'])
    else:
        # Decay LR by a factor of gamma every step_size epochs
        print('lr_scheduler.StepLR')
        exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    print("begin training")
    model = train_model(model, dataloaders, optimizer, exp_lr_scheduler, save_dir,
                        num_epochs=config['optim']['n_epochs'], use_gpu=config['use_gpu'], best_accuracy=best_acc, start_epoch=startEpoch)
elif config['mode'] == 'test':
    outputfile = os.path.join(save_dir, config['mode'] + ".json")
    test_model(model, dataloaders['test'], VQADataset.ans_vocab,
                outputfile, use_gpu=config['use_gpu'])
else:
    print("Invalid config mode %s !!" % config['mode'])

CustomReduceLROnPlateau
begin training
Training Model with use_gpu=True...
Epoch 0/4
----------




(1383/53220) - running loss: 0.2445401924723572, running_corrects: 237, example_count: 1383, acc: 17.136659436008678
(2724/53220) - running loss: 0.23871917539056003, running_corrects: 513, example_count: 2724, acc: 18.83259911894273
(4139/53220) - running loss: 0.21601595121698408, running_corrects: 818, example_count: 4139, acc: 19.763227832809857
(5557/53220) - running loss: 0.19934954427870483, running_corrects: 1104, example_count: 5557, acc: 19.86683462299802
(6978/53220) - running loss: 0.18718141483078374, running_corrects: 1416, example_count: 6978, acc: 20.292347377472055
(8386/53220) - running loss: 0.17653144835970894, running_corrects: 1766, example_count: 8386, acc: 21.05890770331505
(9800/53220) - running loss: 0.16802616691102787, running_corrects: 2128, example_count: 9800, acc: 21.714285714285715
(11224/53220) - running loss: 0.16092900681181288, running_corrects: 2513, example_count: 11224, acc: 22.38952245188881
(12642/53220) - running loss: 0.1558572477393655, runn



(1420/53220) - running loss: 0.09764204167983902, running_corrects: 437, example_count: 1420, acc: 30.77464788732394
(2823/53220) - running loss: 0.09991021714563535, running_corrects: 853, example_count: 2823, acc: 30.216082182075805
(4205/53220) - running loss: 0.10032382266558308, running_corrects: 1276, example_count: 4205, acc: 30.344827586206897
(5571/53220) - running loss: 0.10353816700375416, running_corrects: 1627, example_count: 5571, acc: 29.204810626458443
(6979/53220) - running loss: 0.10401736721471176, running_corrects: 2013, example_count: 6979, acc: 28.843673878779196
(8404/53220) - running loss: 0.10249819552994864, running_corrects: 2458, example_count: 8404, acc: 29.247977153736315
(9821/53220) - running loss: 0.1019638189047396, running_corrects: 2886, example_count: 9821, acc: 29.38600957132675
(11240/53220) - running loss: 0.10133130146303211, running_corrects: 3317, example_count: 11240, acc: 29.51067615658363
(12646/53220) - running loss: 0.10022617596617402, r