In [None]:
import os
import json
import random
import numpy as np
from six.moves import range
from six import iteritems
import h5py
from IPython.display import Image, display


import skimage.io
from skimage.transform import resize
from sklearn.preprocessing import normalize
from nltk.tokenize import word_tokenize

import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision

import options
from utils import utilities as utils
from dataloader import VisDialDataset
from torch.utils.data import DataLoader
from eval_utils.rank_answerer import rankABot
from eval_utils.rank_questioner import rankQBot
from utils import utilities as utils
from utils.visualize import VisdomVisualize

In [None]:
params = {
    
    # A-Bot checkpoint
    'startFrom': "./checkpoints/color/abot_ep_15.vd",
    
    # Q-Bot checkpoint should given if interactive dialog is required
    # 'qstartFrom': "./checkpoints/qbot_sl.vd",
    
    'beamSize': 5,
    'imgFeatureSize':16384,
    'inputImg': 'data/visdial/data_1.0_img.h5',    
    
}

static_params = {
    'numRounds':10,
    'useGPU': False,
    'imgNorm': 0,
    
    'inputJson': "/scr/anarc/motm/data/visdial/data/visdial_params_v2.json",
    'inputQues': '/scr/anarc/motm/data/visdial/data/visdial_data_v2.h5',
    'cocoDir': '/scr/anarc/motm/data/visdial/data/visdial_images',
    'cocoInfo': '/scr/anarc/motm/data/visdial/data/visdial_images/coco_info.json',
}

for key, value in iteritems(static_params):
    params[key] = value

In [None]:
splits = ['test']
dataset = VisDialDataset(params, splits)

In [None]:


# RNG seed
manualSeed = 1597
random.seed(manualSeed)
torch.manual_seed(manualSeed)
if params['useGPU']:
    torch.cuda.manual_seed_all(manualSeed)

print('Loading json file: ' + params['inputJson'])
with open(params['inputJson'], 'r') as fileId:
    info = json.load(fileId)

wordCount = len(info['word2ind'])
# Add <START> and <END> to vocabulary
info['word2ind']['<START>'] = wordCount + 1
info['word2ind']['<END>'] = wordCount + 2
startToken = info['word2ind']['<START>']
endToken = info['word2ind']['<END>']
# Padding token is at index 0
vocabSize = wordCount + 3
print('Vocab size with <START>, <END>: %d' % vocabSize)

# Construct the reverse map
info['ind2word'] = {
    int(ind): word
    for word, ind in info['word2ind'].items()
}
    
def loadModel(params, agent='abot'):
    # should be everything used in encoderParam, decoderParam below
    encoderOptions = [
        'encoder', 'vocabSize', 'embedSize', 'rnnHiddenSize', 'numLayers',
        'useHistory', 'useIm', 'imgEmbedSize', 'imgFeatureSize', 'numRounds',
        'dropout'
    ]
    decoderOptions = [
        'decoder', 'vocabSize', 'embedSize', 'rnnHiddenSize', 'numLayers',
        'dropout'
    ]
    modelOptions = encoderOptions + decoderOptions

    mdict = None
    gpuFlag = params['useGPU']
    startArg = 'startFrom' if agent == 'abot' else 'qstartFrom'
    assert params[startArg], "Need checkpoint for {}".format(agent)

    if params[startArg]:
        print('Loading model (weights and config) from {}'.format(
            params[startArg]))

        if gpuFlag:
            mdict = torch.load(params[startArg])
        else:
            mdict = torch.load(params[startArg],
                map_location=lambda storage, location: storage)

        # Model options is a union of standard model options defined
        # above and parameters loaded from checkpoint
        modelOptions = list(set(modelOptions).union(set(mdict['params'])))
        for opt in modelOptions:
            if opt not in params:
                params[opt] = mdict['params'][opt]

            elif params[opt] != mdict['params'][opt]:
                # Parameters are not overwritten from checkpoint
                pass

    # Initialize model class
    encoderParam = {k: params[k] for k in encoderOptions}
    decoderParam = {k: params[k] for k in decoderOptions}

    encoderParam['startToken'] = encoderParam['vocabSize'] - 2
    encoderParam['endToken'] = encoderParam['vocabSize'] - 1
    decoderParam['startToken'] = decoderParam['vocabSize'] - 2
    decoderParam['endToken'] = decoderParam['vocabSize'] - 1

    if agent == 'abot':
        encoderParam['type'] = params['encoder']
        decoderParam['type'] = params['decoder']
        encoderParam['isAnswerer'] = True
        from visdial.models.answerer import Answerer
        model = Answerer(encoderParam, decoderParam)
        print("e param = ", encoderParam)
        print("e = ", model.encoder)

    elif agent == 'qbot':
        encoderParam['type'] = params['qencoder']
        decoderParam['type'] = params['qdecoder']
        encoderParam['isAnswerer'] = False
        encoderParam['useIm'] = False
        from visdial.models.questioner import Questioner
        model = Questioner(
            encoderParam,
            decoderParam,
            imgFeatureSize=encoderParam['imgFeatureSize'])

    if params['useGPU']:
        model.cuda()

    if mdict:
        model.load_state_dict(mdict['model'])
        
    print("Loaded agent {}".format(agent))
    return model

aBot = None
qBot = None

# load aBot
if params['startFrom']:
    aBot = loadModel(params, 'abot')
    assert aBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
    aBot.eval()

# load qBot
if params['qstartFrom']:
    qBot = loadModel(params, 'qbot')
    assert qBot.encoder.vocabSize == vocabSize, "Vocab size mismatch!"
    qBot.eval()

# load pre-trained VGG 19
print("Loading image feature extraction model")
feat_extract_model = torchvision.models.vgg19(pretrained=True)

feat_extract_model.classifier = nn.Sequential(*list(feat_extract_model.classifier.children())[:-3])
# print(feat_extract_model)
feat_extract_model.eval()

if params['useGPU']:
    feat_extract_model.cuda()

print("Done!")

In [None]:
# Load demo image, question and dialog history

img_path = "demo/img.jpg"
img_mat = skimage.io.imread(img_path)

with open("demo/hist.json") as hfile:
    hist_info = json.load(hfile)

with open("demo/ques.json") as qfile:
    ques_info = json.load(qfile)

In [None]:
example_index = 4
question_turn_index = -1

example = dataset[example_index]
example.keys()

In [None]:
example = dataset.collate_fn([example])

In [None]:
example['ques'].shape

In [None]:
example

In [None]:
numRounds = dataset.numRounds

sortedScoreAll = []
logProbsAll = [[] for _ in range(numRounds)]

scoringFunction=utils.maskedNll

In [None]:
def to_str_opt(opt):
    print(opt)
    opt_str = [info['ind2word'].get(ind,'UNK') for ind in opt.data]
    opt_str = 
    print(opt_str)

In [None]:
to_str_opt(options[0][6][0])

In [None]:
def rankOptions(options, scores):
    '''Rank a batch of examples against a list of options.'''
    numOptions = options.size(1)
    
    # Sort all predicted scores
    sortedScore, sortedInds = torch.sort(scores, 1)
    #print("s = ", scores)
    #print("ss = ", sortedScore)
    #print("si = ", sortedInds)
    
    #sortedAnswers = [options_str[i] for i in sortedInds.data[0]]
    #sortedAnswersStr = [raw_data['data']['answers'][i] for i in sortedAnswers]
    #print("sa = ", sortedAnswersStr)
    return sortedScore, sortedInds
    
    
    
   

In [None]:
image = Variable(example['img_feat'], volatile=True)
caption = Variable(example['cap'], volatile=True)
captionLens = Variable(example['cap_len'], volatile=True)
questions = Variable(example['ques'], volatile=True)
quesLens = Variable(example['ques_len'], volatile=True)
answers = Variable(example['ans'], volatile=True)
ansLens = Variable(example['ans_len'], volatile=True)
options = Variable(example['opt'], volatile=True)
optionLens = Variable(example['opt_len'], volatile=True)
#correctOptionInds = Variable(example['ans_id'], volatile=True)

aBot.reset()
aBot.observe(-1, image=image, caption=caption, captionLens=captionLens)
for round in range(numRounds):
    print("Round = ", round)
    if quesLens[0][round].data[0] == 1:
        print("skipping round")
        continue
    if round==7:
        print("q = ", questions[:,round])
        print("ql = ", quesLens[:,round])
        print("a = ", answers[:,round])
        print("al = ", ansLens[:,round])
    aBot.observe(
        round,
        ques=questions[:, round],
        quesLens=quesLens[:, round],
        ans=answers[:, round],
        ansLens=ansLens[:, round])
    print("opt = ", options[:,round])
    logProbs = aBot.evalOptions(options[:, round],
                                optionLens[:, round], scoringFunction)
    #print("lp = ", logProbs)
    logProbsCurrent = aBot.forward()
    #print("lpc = ", logProbsCurrent)
    logProbsAll[round].append(
        scoringFunction(logProbsCurrent,
                        answers[:, round].contiguous()))
    #print("lpa = ", logProbsAll)
    sortedScore, sortedInds = rankOptions(options[:,round], logProbs)
    sortedScoreAll.append((sortedScore,sortedInds))
    #batchRanks = rankOptions(options[:, round],
    #                         correctOptionInds[:, round], logProbs)
    #ranks.append(batchRanks)

In [None]:
# pad image id to 12 digits with zeros out front, and add jpg extension
def image_id_to_suffix(image_id):
    return str(image_id).zfill(12) + '.jpg'

def get_image_path(image_id, images_path):
    for path in images_path:
        filename = path + image_id_to_suffix(image_id)
        if os.path.exists(filename):
            #info_json['images'].append({'id':image_id,'file_path':
            #                  os.path.join(os.path.basename(os.path.dirname(path)), os.path.basename(filename))})
            #second to last dir in path is train2014 or val2014, join with image filename
            return filename
        
    raise ValueError("Image id \"{}\" could not be found in given paths \"{}\""
                     .format(image_id, images_path))


def visualize_example(dialogue_entry, questions, answers, images_path, verbose=False):
    image_id = dialogue_entry['image_id']
    #image_filenames = [path + str(dialogue_entry['image_id']).zfill(12) + '.jpg' for path in images_path]
    image_filenames = [path + image_id_to_suffix(dialogue_entry['image_id']) for path in images_path]
    
    if len(images_path) == 1:
        image_filename = image_filenames[0]
        display(Image(filename=image_filename))
    elif len(images_path) == 2:
        if os.path.exists(image_filenames[0]):
            image_filename = image_filenames[0]
            display(Image(filename=image_filename))
        elif os.path.exists(image_filenames[1]):
            image_filename = image_filenames[1]
            display(Image(filename=image_filename))
        else: 
            image_filename = None
            print("Image could not be found.")
    else:
        raise ValueError("Please update visualize_example() to search more than 2 possible image names.")
    
    if verbose:
        print("\nDialogue entry: \n{}".format(dialogue_entry))
        print("Image from filename {}\n".format(image_filename))
        
    print("Caption = \"{}\"\n".format(dialogue_entry['caption']))
    for turn in dialogue_entry['dialog']:
        question_id = turn['question']
        print("Question = \"{}\"".format(questions[question_id]))
        if 'answer' in turn:
            answer_id = turn['answer']
            print("\t\t\t\tAnswer = \"{}\"\n".format(answers[answer_id]))
        else:
            answer_options_ids = turn['answer_options']
            print("\t\t\t\tAnswer options = \n{}".format([answers[a_id] for a_id in answer_options_ids]))
            
inputs_test = {
                            "dialog_path":"visdial_1.0_test.json",
                            "image_locations":["visdial_images/VisualDialog_test2018"],
                            "image_prefix": ["VisualDialog_test2018_"]
               }      

data_basedir = "../data/visdial/data"

dialog_path = os.path.join(data_basedir, inputs_test["dialog_path"])
image_paths = [os.path.join(data_basedir, location, prefix) 
                for location, prefix in list(zip(inputs_test["image_locations"],inputs_test["image_prefix"]))]

In [None]:
def visualize_predictions(dialogue_entry, questions, answers, images_path, sortedScoreAll, verbose=False):
    
    image_id = dialogue_entry['image_id']
    #image_filenames = [path + str(dialogue_entry['image_id']).zfill(12) + '.jpg' for path in images_path]
    image_filename = get_image_path(image_id, images_path)
    display(Image(filename=image_filename))
    
        
    print("Caption = \"{}\"\n".format(dialogue_entry['caption']))
    for i, turn in enumerate(dialogue_entry['dialog']):
        question_id = turn['question']
        print("\nQuestion = \"{}\"".format(questions[question_id]))
        if 'answer' in turn:
            answer_id = turn['answer']
            print("\t\t\t\tGround Truth Answer = \"{}\"\n".format(answers[answer_id]))
        else:
            answer_options_ids = turn['answer_options']
            print("\t\t\t\tAnswer options = \n{}".format([answers[a_id] for a_id in answer_options_ids]))
        #print("i=",i)
        #print(question_turn_index)
        #print(len(dialogue_entry['dialog'])+i)
        if i==question_turn_index or i==len(dialogue_entry['dialog'])+question_turn_index:
            sortedScores, sortedInds = sortedScoreAll[i]
            sortedAnswers = [options_str[i] for i in sortedInds.data[0]]
            sortedAnswersStr = [answers[i] for i in sortedAnswers]
            print("\t\t\t\tSelected answers = {}".format(list(zip(sortedAnswersStr, list(sortedScores[0].data)))))

In [None]:
#dialog_path = os.path.join("../data/visdial/data", "visdial_1.0_test.json")
inputs_test = {
                            "dialog_path":"visdial_1.0_test.json",
                            "image_locations":["visdial_images/VisualDialog_test2018"],
                            "image_prefix": ["VisualDialog_test2018_"]
               }      

data_basedir = "../data/visdial/data"

dialog_path = os.path.join(data_basedir, inputs_test["dialog_path"])
image_paths = [os.path.join(data_basedir, location, prefix) 
                for location, prefix in list(zip(inputs_test["image_locations"],inputs_test["image_prefix"]))]

raw_data = json.load(open(dialog_path,'r'))
example_raw_data = raw_data['data']['dialogs'][example_index]
options_str = example_raw_data['dialog'][question_turn_index]['answer_options']
num_turns = len(example_raw_data['dialog'])

visualize_predictions(example_raw_data, 
                      raw_data['data']['questions'], 
                      raw_data['data']['answers'], 
                      image_paths,
                      sortedScoreAll)
    

In [None]:
loaded_data = json.load(open(dialog_path,'r'))
image_feats_h5 = h5py.File(params['inputImg'], 'r')

In [None]:
example_index = 4
question_turn_index = -1

In [None]:

dialog_example = loaded_data['data']['dialogs'][example_index]
visualize_example(dialog_example, loaded_data['data']['questions'], loaded_data['data']['answers'], 
                  image_paths, verbose=False)


In [None]:
question_turn = dialog_example['dialog'][question_turn_index]
history = dialog_example['dialog'][:question_turn_index]
img_path = get_image_path(dialog_example['image_id'], image_paths)
img_feats = torch.FloatTensor(image_feats_h5["images_test"][example_index]).unsqueeze(0)

print("q turn = ", question_turn)
print("history = ", history)
print("img path = ", img_path)
print("img_feats shape = ", img_feats.shape)

In [None]:
options_indices = question_turn['answer_options']

In [None]:
loaded_data['data']['questions'][32059]

In [None]:
# Display loaded image
from IPython.display import display
from IPython.display import Image
display(Image(img_path))

In [None]:
ind_map = lambda words: np.array([info['word2ind'].get(word, info['word2ind']['UNK']) 
                                  for word in words], dtype='int64')

tokenize = lambda string: ['<START>'] + word_tokenize(string) + ['<END>']

# Process image
def transform(img):
    img = img.astype("float")/255
    img = resize(img, (224, 224), mode='constant')
    img[:,:,0] -= 0.485
    img[:,:,1] -= 0.456
    img[:,:,2] -= 0.406
    return img.transpose([2,0,1])

raw_img = transform(skimage.io.imread(img_path))

# Process caption
#caption_tokens = tokenize(hist_info['caption'])
caption_tokens = tokenize(dialog_example['caption'])
caption = ind_map(caption_tokens)

# Process history
h_question_tokens = []
h_questions = []
h_answer_tokens = []
h_answers = []
#for round_idx, item in enumerate(hist_info['dialog']):
for round_idx, item in enumerate(history):

    #ans_tokens = tokenize(item['answer'])
    ans_tokens = tokenize(loaded_data['data']['answers'][item['answer']])
    h_answer_tokens.append(ans_tokens)
    h_answers.append(ind_map(ans_tokens))
    
    #ques_tokens = tokenize(item['question'])
    ques_tokens = tokenize(loaded_data['data']['questions'][item['question']])
    h_question_tokens.append(ques_tokens)
    h_questions.append(ind_map(ques_tokens))
    
# Process question
#question_tokens = tokenize(ques_info['question'])
question_tokens = tokenize(loaded_data['data']['questions'][question_turn['question']])
question = ind_map(question_tokens)

# Process options
options_tokens = []
options = []
for opt in options_indices:
    opt_tokens = tokenize(loaded_data['data']['answers'][opt])
    options_tokens.append(opt_tokens)
    options.append(ind_map(opt_tokens))


In [None]:
def var_map(tensor):
    if params['useGPU']:
        tensor = tensor.cuda()
    return Variable(tensor.unsqueeze(0), volatile=True)

#img_tensor = var_map(torch.from_numpy(raw_img).float())
#img_feats = feat_extract_model(img_tensor)
#_norm = torch.norm(img_feats, p=2, dim=1)
#img_feats = img_feats.div(_norm.expand_as(img_feats))ffe

caption_tensor = var_map(torch.from_numpy(caption))
caption_lens = var_map(torch.LongTensor([len(caption)]))

question_tensor = var_map(torch.from_numpy(question))
question_lens = var_map(torch.LongTensor([len(question)]))

hist_ans_tensors = [var_map(torch.from_numpy(ans)) for ans in h_answers]
hist_ans_lens = [var_map(torch.LongTensor([len(h_ans)])) for h_ans in h_answer_tokens]
hist_ques_tensors = [var_map(torch.from_numpy(ques)) for ques in h_questions]
hist_ques_lens = [var_map(torch.LongTensor([len(h_ques)])) for h_ques in h_question_tokens]

options_tensors = [var_map(torch.from_numpy(opt)) for opt in options]
options_lens = [var_map(torch.LongTensor([len(opt)])) for opt in options_tokens]

In [None]:
hist_ques_tensors[0]
hist_ques_lens[0]

In [None]:
options_tokens[0]
options_lens[0].data[0][0]


In [None]:
print(options_tensors[0].shape)
print(options_tensors[1].shape)
print(options_tensors[2].shape)


options_tensors_cat = Variable(torch.LongTensor(len(options_tensors), 20+2+1).fill_(0)) #20=max ans len

options_tensors_cat[:, 0] = startToken

for ansId in range(len(options_tensors)):
    length = options_lens[ansId].data[0][0]
    if length == 0:
        print('Warning: Skipping empty option answer list at (%d)'\
                %ansId)
        continue

    options_tensors_cat[ansId, 1:length + 1] = options_tensors[ansId][0][:length]
    options_tensors_cat[ansId, length + 1] = endToken


options_tensors_cat

In [None]:
# Helper functions for converting tensors to words
to_str_pred = lambda w, l: str(" ".join([info['ind2word'][x] for x in list( filter(
        lambda x:x>0,w.data.cpu().numpy()))][:l.data.cpu()[0]]))[8:]
to_str_gt = lambda w: str(" ".join([info['ind2word'][x] for x in filter(
        lambda x:x>0,w.data.cpu().numpy())]))[8:-6]

In [None]:
options_tensors_cat

In [None]:
to_str_pred(question_tensor[0], question_lens[0])

In [None]:
to_str_gt(question_tensor[0])

In [None]:
question_tensor[0].shape
hist_ques_tensors.shape

In [None]:
to_str_pred(hist_ques_tensors[1][0], hist_ques_lens[1][0])

In [None]:
to_str_gt(hist_ques_tensors[1][0])

In [None]:
question_tensor[0].shape

In [None]:
hist_ques_tensors[1][0].shape

In [None]:
if aBot:
    aBot.eval(), aBot.reset()
    aBot.observe(
        -1, image=img_feats, caption=caption_tensor, captionLens=caption_lens)

if qBot:
    qBot.eval(), qBot.reset()
    qBot.observe(-1, caption=caption_tensor, captionLens=caption_lens)

from IPython.display import display
from IPython.display import Image
display(Image(img_path))

print("Caption: ", to_str_gt(caption_tensor[0]))
    
#numRounds = len(hist_info['dialog'])
numRounds = len(history)
beamSize = params['beamSize']
for round in range(numRounds):
    if qBot is None:
        aBot.observe(
            round,
            ques=hist_ques_tensors[round],
            quesLens=hist_ques_lens[round])
        aBot.observe(
            round,
            ans=hist_ans_tensors[round],
            ansLens=hist_ans_lens[round])
        _ = aBot.forward()
        answers, ansLens = aBot.forwardDecode(
            inference='greedy', beamSize=beamSize)

    elif aBot is not None and qBot is not None:
        questions, quesLens = qBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        qBot.observe(round, ques=questions, quesLens=quesLens)
        aBot.observe(round, ques=questions, quesLens=quesLens)
        answers, ansLens = aBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        aBot.observe(round, ans=answers, ansLens=ansLens)
        qBot.observe(round, ans=answers, ansLens=ansLens)
        
    print("Q%d: "%(round+1), to_str_gt(hist_ques_tensors[round][0]))
    print("A%d: "%(round+1), to_str_gt(hist_ans_tensors[round][0]))
        
# After processing history
if qBot is None:
    aBot.observe(
        numRounds,
        ques=question_tensor,
        quesLens=question_lens)
    answers, ansLens = aBot.forwardDecode(
        inference='greedy', beamSize=beamSize)
    
    # Printing
    print("Q%d: "%(numRounds+1), to_str_gt(question_tensor[0]))
    print("A%d: "%(numRounds+1), to_str_pred(answers[0], ansLens[0]))
    
elif aBot is not None and qBot is not None:
    questions, quesLens = qBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    qBot.observe(numRounds, ques=questions, quesLens=quesLens)
    aBot.observe(numRounds, ques=questions, quesLens=quesLens)
    answers, ansLens = aBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    aBot.observe(numRounds, ans=answers, ansLens=ansLens)
    qBot.observe(numRounds, ans=answers, ansLens=ansLens)

    # Printing
    print("Q%d: "%(numRounds+1), to_str_pred(questions[0], quesLens[0]))
    print("A%d: "%(numRounds+1), to_str_pred(answers[0], ansLens[0]))

In [None]:
if aBot:
    aBot.eval(), aBot.reset()
    aBot.observe(
        -1, image=img_feats, caption=caption_tensor, captionLens=caption_lens)

if qBot:
    qBot.eval(), qBot.reset()
    qBot.observe(-1, caption=caption_tensor, captionLens=caption_lens)

from IPython.display import display
from IPython.display import Image
display(Image(img_path))

print("Caption: ", to_str_gt(caption_tensor[0]))
    
#numRounds = len(hist_info['dialog'])
numRounds = len(history)
beamSize = params['beamSize']
ranks = []
for round in range(numRounds):
    if qBot is None:
        aBot.observe(
            round,
            ques=questions[:, round],
            quesLens=quesLens[:, round],
            ans=answers[:, round],
            ansLens=ansLens[:, round])
        logProbs = aBot.evalOptions(options[:, round],
                                    optionLens[:, round], scoringFunction)
        print("log probs = ", logProbs)
        logProbsCurrent = aBot.forward()
        logProbsAll[round].append(
            scoringFunction(logProbsCurrent,
                            answers[:, round].contiguous()))
        batchRanks = rankOptions(options[:, round],
                                 correctOptionInds[:, round], logProbs)
        ranks.append(batchRanks)
            

    elif aBot is not None and qBot is not None:
        questions, quesLens = qBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        qBot.observe(round, ques=questions, quesLens=quesLens)
        aBot.observe(round, ques=questions, quesLens=quesLens)
        answers, ansLens = aBot.forwardDecode(
            beamSize=beamSize, inference='greedy')
        aBot.observe(round, ans=answers, ansLens=ansLens)
        qBot.observe(round, ans=answers, ansLens=ansLens)
        
    print("Q%d: "%(round+1), to_str_gt(hist_ques_tensors[round][0]))
    print("A%d: "%(round+1), to_str_gt(hist_ans_tensors[round][0]))
        
# After processing history
if qBot is None:
    aBot.observe(
        numRounds,
        ques=question_tensor,
        quesLens=question_lens)
    answers, ansLens = aBot.forwardDecode(
        inference='greedy', beamSize=beamSize)
    
    # Printing
    print("Q%d: "%(numRounds+1), to_str_gt(question_tensor[0]))
    print("A%d: "%(numRounds+1), to_str_pred(answers[0], ansLens[0]))
    
elif aBot is not None and qBot is not None:
    questions, quesLens = qBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    qBot.observe(numRounds, ques=questions, quesLens=quesLens)
    aBot.observe(numRounds, ques=questions, quesLens=quesLens)
    answers, ansLens = aBot.forwardDecode(
        beamSize=beamSize, inference='greedy')
    aBot.observe(numRounds, ans=answers, ansLens=ansLens)
    qBot.observe(numRounds, ans=answers, ansLens=ansLens)

    # Printing
    print("Q%d: "%(numRounds+1), to_str_pred(questions[0], quesLens[0]))
    print("A%d: "%(numRounds+1), to_str_pred(answers[0], ansLens[0]))