In [0]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cuda:0


In [0]:
import torch
from torch.autograd import Variable

from dataset import Dictionary, HMQAFeatureDataset
from model import SoftCount
from config import *
from datetime import datetime, timedelta

import h5py
import numpy as np
import _pickle as pkl
import json
import torch.nn.functional as F

In [0]:
dictionary = Dictionary.load_from_file('data/dictionary.pkl')
train_h5_loc = './data/train36.hdf5'
with h5py.File(train_h5_loc, 'r') as hf:
    train_image_features = np.array(hf.get('image_features')[0:5000])
    train_spatials_features = np.array(hf.get('spatial_features')[0:5000])

from dataset import HMQAFeatureDataset

hmqa_train_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/train36_imgid2idx.pkl", "rb")),
    image_features = train_image_features, 
    spatial_features = train_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="train", 
    dictionary=dictionary
)
del HMQAFeatureDataset

loading dictionary from data/dictionary.pkl


In [0]:
val_h5_loc = './data/val36.hdf5'
with h5py.File(val_h5_loc, 'r') as hf:
    val_image_features = np.array(hf.get('image_features')[0:5000])
    val_spatials_features = np.array(hf.get('spatial_features')[0:5000])
 
from dataset import HMQAFeatureDataset

hmqa_dev_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="dev", 
    dictionary=dictionary
)

hmqa_test_dset = HMQAFeatureDataset(
    img_id2hqma_idx = pkl.load(open("./data/val36_imgid2idx.pkl", "rb")),
    image_features = val_image_features, 
    spatial_features = val_spatials_features, 
    qid2count = json.load(open("./data/how_many_qa/qid2count.json", "rb")), 
    qid2count2score = json.load(open("./data/how_many_qa/qid2count2score.json", "rb")), 
    name="test", 
    dictionary=dictionary
)
del HMQAFeatureDataset

In [0]:
from torch.utils.data import DataLoader

hmqa_train_loader = DataLoader(hmqa_train_dset, 64, shuffle=True, num_workers=0)
hmqa_dev_loader = DataLoader(hmqa_dev_dset, 64, shuffle=True, num_workers=0)
hmqa_test_loader = DataLoader(hmqa_test_dset, 64, shuffle=True, num_workers=0)

In [0]:
def evaluate(model, hmqa_loader):
    
    all_acc = []
    all_se = []
    for i, (v_emb, b, q, c, c2s) in enumerate(hmqa_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float()
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        
        nearest_pred = (pred + 0.5).long().clamp(0, 20)
        for one_c, one_c2s, one_pred in zip(c, c2s, nearest_pred):
            one_c = one_c.cpu().data
            one_pred = one_pred.cpu().data
            
            all_se.append((one_c - one_pred.float()) ** 2)
            all_acc.append(one_c2s[one_pred])
    
    acc = torch.stack(all_acc).mean()
    rmse = torch.stack(all_se).mean() ** 0.5
    
    return acc, rmse

In [0]:
from model import SoftCount
model = SoftCount(ques_dim=1024, score_dim=512, dropout=0.2)
del SoftCount
if USE_CUDA:
    model.cuda()
model

initialising with glove embeddings
 self.word_dim is  torch.Size([28333, 300])
done.


SoftCount(
  (ques_parser): QuestionParser(
    (embd): Embedding(28334, 300, padding_idx=28333)
    (rnn): GRU(300, 1024)
    (drop): Dropout(p=0.2)
  )
  (f): ScoringFunction(
    (v_drop): Dropout(p=0.2)
    (q_drop): Dropout(p=0.2)
    (v_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=2048, out_features=512, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (q_proj): FCNet(
      (main): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (s_drop): Dropout(p=0.2)
  )
  (W): Linear(in_features=512, out_features=1, bias=True)
)

In [0]:
test_acc, test_rmse = evaluate(model, hmqa_test_loader)
test_acc, test_rmse



(tensor(0.0030), tensor(15.4859))

In [0]:
opt = torch.optim.Adam(model.parameters(), lr=3e-4)
test_accs = []
test_rmses = []
dev_accs = []
dev_rmses = []

for epoch in range(10):
    for i, (v_emb, b, q, c, _) in enumerate(hmqa_train_loader):
        v_emb = Variable(v_emb)
        q = Variable(q)
        c = Variable(c).float().view(-1)
        
        if USE_CUDA:
            v_emb = v_emb.cuda()
            q = q.cuda()
            c = c.cuda()

        pred = model(v_emb, q)
        loss = F.smooth_l1_loss(pred, c)
        
        if i % 100 == 0:
            print("epoch = {}, i = {}, loss = {}".format(
                epoch, i, loss.item()))
        
        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        opt.step()
    
    print("evaluating model on dev and test...")

    model.eval()
    dev_acc, dev_rmse = evaluate(model, hmqa_dev_loader)
    print("dev_acc: {}, dev_rmse: {}".format(dev_acc, dev_rmse))
    test_acc, test_rmse = evaluate(model, hmqa_test_loader)
    print("test_acc: {}, test_rmse: {}".format(test_acc, test_rmse))
    model.train()
    
    test_accs.append(test_acc)
    test_rmses.append(test_rmse)
    dev_accs.append(dev_acc)
    dev_rmses.append(dev_rmse)



epoch = 0, i = 0, loss = 13.870506286621094
epoch = 0, i = 100, loss = 1.9934184551239014
epoch = 0, i = 200, loss = 1.0252726078033447
epoch = 0, i = 300, loss = 1.3409125804901123
epoch = 0, i = 400, loss = 1.2460479736328125
epoch = 0, i = 500, loss = 1.2892258167266846
epoch = 0, i = 600, loss = 1.153021216392517
epoch = 0, i = 700, loss = 1.4135723114013672
evaluating model on dev and test...
dev_acc: 0.3360845446586609, dev_rmse: 3.4283335208892822
test_acc: 0.3458799421787262, test_rmse: 3.2456741333007812
epoch = 1, i = 0, loss = 1.5846731662750244
epoch = 1, i = 100, loss = 1.4423011541366577
epoch = 1, i = 200, loss = 1.5388858318328857
epoch = 1, i = 300, loss = 0.9985235929489136
epoch = 1, i = 400, loss = 1.1593396663665771
epoch = 1, i = 500, loss = 1.516355037689209
epoch = 1, i = 600, loss = 1.8141047954559326
epoch = 1, i = 700, loss = 1.0379215478897095
evaluating model on dev and test...
dev_acc: 0.29249754548072815, dev_rmse: 3.695399522781372
test_acc: 0.3073999881

In [0]:
top_dev_accs = sorted(zip(dev_accs, test_accs, test_rmses), reverse=True)
best_dev_acc, corr_test_acc, corr_test_rmse = top_dev_accs[0]
print("The best dev accuracy is {}. The corresponding test accuracy and test RMSE are {} and {} respectively".format(
    best_dev_acc, corr_test_acc, corr_test_rmse
))

The best dev accuracy is 0.3360845446586609. The corresponding test accuracy and test RMSE are 0.3458799421787262 and 3.2456741333007812 respectively
