In [1]:
# open types.txt

with open('abstract_v002_question_types.txt') as f:
    lines = f.readlines()
with open('mscoco_question_types.txt', ) as f2:
    lines2 = f2.readlines()

## filter out  duplicates
all_types = list(set(lines + lines2))
print(len(all_types))

# filter out suffixes (only keep shortest)
# e.g. 'what is' and 'what' are both in the list, but we only want 'what'
all_types.sort(key=len, reverse=False)

# remove \n
all_types = [t.strip() for t in all_types]

print(all_types[0])
types = []
for t in all_types:
    if not any([t2 in t for t2 in types]):
        types.append(t)
for t in types:
    print("'" + t + "',")
print(len(types))

103
is
'is',
'do',
'can',
'why',
'are',
'has',
'how',
'was',
'who',
'what',
'could',
'which',
'did the',
'will the',
'none of the above',
15


In [1]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch

def generate_masked_template(question, demonstrations, model, tokenizer):
    # Construct input with explicit instruction to generate a template
    input_text = f"{question}<extra_id_0>.{' '.join(demonstrations)}"
    # Tokenize the input
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    
    # Generate output by filling in the <extra_id_0> token
    output_ids = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)
    
    # Decode the generated output
    generated_template = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()

    # only until first [mask]
    generated_template = generated_template.split("[mask]")[0] + "[mask]."
    
    return generated_template


# Load pre-trained T5 model and tokenizer
model_name = "google/t5-v1_1-small"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)

# Examples of demonstrations for the "What color is" question type
demonstrations_color = [
    "What color is the floor of this area? The color of the floor of this area is [mask].",
    "What color is the pillow the cat is on? The color of the pillow the cat is on is [mask].",
    "What color is the child’s shorts? The color of the child’s shorts is [mask].",
    "What color is the lettering on the business sign? The color of the lettering on the business sign is [mask].",
    "What color is the sky in the picture? The color of the sky in the picture is [mask].",
    "What color is the car parked on the street? The color of the car parked on the street is [mask].",
    "What color is the dress the person is wearing? The color of the dress the person is wearing is [mask].",
    "What color is the flower in the garden? The color of the flower in the garden is [mask].",
    "What color is the book on the shelf? The color of the book on the shelf is [mask].",
    "What color is the umbrella in the picture? The color of the umbrella in the picture is [mask]."
]

# Example usage
question_example = "What color is the fence behind the man?"
generated_template = generate_masked_template(question_example, demonstrations_color, model, tokenizer)

# Print the result
print("Question:", question_example)
print("Generated Template:", generated_template)



Using device: cuda
Question: What color is the fence behind the man?
Generated Template: The color of the fence behind the man is [mask].


In [2]:
# open demonstration from json file and load them into dict
import json

with open('demonstartion_t5.json') as f:
    demonstrations = json.load(f)

print(len(demonstrations))

14


In [5]:
import tqdm

# create json file with all questions of dataset
def create_all(demonstrations, model, tokenizer, name="train"):

    key_errors = []
    #'sentences'+ name +'.json'
    with open('sentences_'+ name +'.json', 'a') as f:
        #with open('Annotations/MultipleChoice_mscoco_'+ name + '2014_questions.json', 'r') as question_file:
        with open('Annotations/MultipleChoice_abstract_v002_val2015_questions.json', 'r') as question_file:
            #with open('Annotations/mscoco_'+ name + '2014_annotations.json', 'r') as answer_file:
            with open('Annotations/abstract_v002_val2015_annotations.json', 'r') as answer_file:
                question_data = json.load(question_file)
                answer_data = json.load(answer_file)

                annntoations = (answer_data['annotations'])
                questions = (question_data['questions'])
                
                found_last = False
                for idx, question in enumerate(tqdm.tqdm(questions, desc="Creating Templates")):
                    
                    
                    image_id = question['image_id']
                    question_text = question['question']
                    question_id = question['question_id']
                    answers_text = question['multiple_choices']
                    """
                    if question_id == 800532:
                        print("found last; continue")
                        found_last = True
                        continue
                    
                    if not found_last:
                        continue
                    """
                    # find quesion id and image id in annotations
                    for annotation in annntoations:
                        if annotation['question_id'] == question_id and annotation['image_id'] == image_id:
                            correct_answer = annotation['multiple_choice_answer']
                            # get the index of the answer in the list of answers
                            for i, possible_answer in enumerate(answers_text):
                                if possible_answer == correct_answer:
                                    index = i
                                    break
                    
                    # find demonstrations for question type
                    # first word of question
                    question_type = question_text.split(" ")[0].lower()

                    try:
                        demonstrations_for_question = demonstrations[question_type]
                    except KeyError:
                        # if question type not in demonstrations, skip question
                        key_errors.append(idx)
                        continue
                    
                    # generate template
                    generated_template = generate_masked_template(question_text, demonstrations_for_question, model, tokenizer)

                    # replace [mask] in template with answers
                    answers = [generated_template.replace("[mask]", f"{answer}") for answer in answers_text]
                    
                    # safe everything in json file
                    json.dump({"image_id": image_id, "question_id": question_id, "question": question_text,"answers":answers, "correct_answer": index}, f)
                    
                    f.write(",\n")
    print("key errors: ", set(key_errors))
    print(f"errors: {len(key_errors)}")
    print(f"errors: {len(set(key_errors))}")
    return key_errors
                
                


In [10]:
import torch
print(torch.cuda.is_available())
print(model.device)

True
cpu


In [6]:
errors = create_all(demonstrations, model, tokenizer, name="abstract")

Creating Templates: 100%|██████████| 30000/30000 [4:08:49<00:00,  2.01it/s]  

key errors:  {24578, 3, 24584, 16393, 8204, 24589, 16398, 15, 16399, 16404, 21, 8214, 23, 24597, 24601, 27, 24604, 8222, 35, 8227, 37, 16419, 24613, 45, 24621, 49, 8242, 16434, 8246, 8247, 16441, 8254, 24638, 8257, 16449, 69, 24645, 72, 24649, 8267, 16461, 16462, 8271, 24659, 86, 95, 24671, 24673, 103, 8297, 107, 8301, 16493, 114, 24696, 121, 24697, 24698, 8320, 8322, 8325, 24709, 24713, 143, 8336, 145, 24720, 24721, 24729, 155, 8351, 16552, 8361, 8364, 173, 24748, 24750, 177, 16562, 179, 24755, 183, 8375, 24763, 16575, 8387, 16579, 24771, 24773, 205, 24781, 16593, 24787, 24788, 218, 8413, 24797, 8415, 224, 8416, 8417, 8418, 8420, 24800, 8422, 24808, 16617, 234, 16618, 16624, 16626, 16627, 8436, 24821, 248, 16632, 16636, 8449, 24837, 8455, 16648, 267, 8460, 24847, 8465, 24849, 24853, 16662, 16663, 24862, 16677, 8491, 16685, 303, 8495, 16690, 307, 24885, 311, 8504, 8505, 24889, 316, 319, 8513, 24898, 16707, 24899, 325, 326, 16710, 16711, 24901, 24907, 334, 335, 24913, 8532, 8535, 344, 1




In [18]:
key_errors=  {'an', 'a', 'sunny', 'pet', 'natural', 'two', 'stop', 'the', 'about', 'no', 'by', 'north,', 'desert', 'granite', "men's", 'that', 'had', 'july', 'evidence', 'considering', 'lady', 'bud', 'heads', 'her', 'too', 'throwing', 'farmland', 'speaker', 'drywall', 'excluding', 'through', 'as', 'looking', 'they', 'someone', 'subtract', 'weight', 'between', 'instead', 'from', 'balancing', 'would', 'to', 'upside', 'bags', 'their', 'dog', 'insignia', 'people', 'ignoring', 'describe', 'into', 'whether', "isn't", 'yes', 'both', 'wines', 'even', 'he', 'bike', 'outside', 'whom', 'food', 'based', 'cabinets', 'baseball', 'checkerboard', 'have', "how's", 'bird', "there's", 'airplane', 'colors', 'count', '3', "aren't", 'animals', 'onto', 'name', '4', 'did', 'wool', 'brown', 'sun', 'were', 'lifting', 'painting', 'height', 'cloudy', 'forks', 'type', 'most', 'given', 'four', 'beside', 'color', 'there', 'everyone', 'not', 'behind', 'indoor', '1.', "shouldn't", 'out', 'before', 'anyone', 'urban', 'dinner', 'hunted', 'only', 'relationship', 'on', 'prom', 'gas', 'company', 'bedroom', 'it', 'fries', 'game', 'octagon', 'small', 'junction', 'walk', 'all', 'formal', 'roughly', 'lamps', 'in', 'upon', 'what,', 'winter', 'traditionally,', 'no,', 'indoors', "don't", 'besides', 'approximately', 'pizza', 'shadows', 'uphill', "doesn't", 'after', 'eating', "what's", 'at', 'translate', 'his', 'see', 'does', 'standing', 'yes,', 'when', 'tennis', 'windows', 'total', 'daytime', "wouldn't", 'east', 'airplanes', 'kitchen', 'laptop', 'approaching', 'since', 'will', "where's", 'male', 'red', 'milk', 'during', 'where', 'brand', 'adults', 'man', 'spell', 'sailboat', 'island', 'equipment', 'prepare', 'approximately,', 'another', 'inside', 'houses', 'sexes', 'this', 'three', 'pillows', "it's", 'liquid,', 'might', 'with', 'using', 'for', 'placed', 'fill', 'seat,', 'gene', 'if', 'veterans', 'under', 'lid', 'according', 'cheese', 'up', 'something', 'hazy', 'black', 'any', 'true', 'viewing', 'tell', 'breakfast', 'scale', 'should', 'adult', 'whose', 'legally,', 'unhappy', 'must', 'bacon', 'rope', "he's", 'around', 'aside', 'figure', 'each', 'night', 'these', 'am', 'six', 'please', 'animal', 'long', 'toward', 'movie', 'think', 'android', "who's", 'i', 'number', 'of', 'eyes,', 'intelligent', 'other', 'guy', "can't", 'overcast', 'wristwatch,', 'row', "england's", 'curly', 'while', 'same', 'including', 'evening', 'those', 'one', 'right', 'preservatives', 'assuming', 'two.', 'female', 'delicious', 'judging', 'jars', 'many'}

print(len(set(key_errors)))

265


In [27]:
import fileinput
import json

def delete_lines_from_json(file_path, line_numbers_to_delete):
    current_line_number = 0

    # Iterate through the lines of the file in-place
    for line in fileinput.input(file_path, inplace=True):
        current_line_number += 1

        # Check if the current line number should be deleted
        if current_line_number not in line_numbers_to_delete:
            print(line, end='')  # Print the line back to the file



In [32]:
delete_lines_from_json('Full_annotations_sentences.json', errors)

In [1]:
from VQA_Datasetv2 import VQA_Dataset_Sentences
from torch.utils.data import DataLoader
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
dataset = VQA_Dataset_Sentences()
dataset.load(device, "train", 100)

type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class 'int'>
type:  <class

: 