For each HP, we will have qid-to-output for gp0, 1, 2, 3

We first combine all of them together.

In [1]:
import pickle
import json

In [2]:
def evaluate_results(hp_folder):
    full_questions_to_outputs = {}
    for gp_id in range(4):
        file_path = f"okvqa_eval/{hp_folder}/qid_to_output_gp_{gp_id}.pkl"
        with open(file_path, "rb") as f:
            outputs_i = pickle.load(f)
            full_questions_to_outputs.update(outputs_i)
    print(f"len(full_questions_to_outputs.keys()): {len(full_questions_to_outputs.keys())}")

    # okvqa_annotations
    val_annotations_path = "dataset/okvqa_annotations/mscoco_val2014_annotations.json"
    with open(val_annotations_path) as json_file:
        val_annotations = json.load(json_file)
    # print(f"Keys: {val_annotations.keys()}")
    # print(f"['question_types']['one']: {val_annotations['question_types']['one']}")
    # print(f"Num annotations: {len(val_annotations['annotations'])}")
    # print(f"annotation[0]: {val_annotations['annotations'][0]}")

    def post_process_output(output):
        # Get rid of `</s>` and lowercap the output
        #eos_id = output.find("</s>")
        return output.lower()
    
    total_scores = 0
    total_tests = 0
    for annotation in val_annotations["annotations"]:
        question_id = annotation["question_id"]
        if question_id not in full_questions_to_outputs.keys():
            continue
        llava_output = full_questions_to_outputs[question_id]
        llava_output = post_process_output(llava_output)
        score = 0
        for answer in annotation['answers']:
            if llava_output == answer['answer']:
                score += 1
        total_scores += min (score / 3.0, 1)
        total_tests += 1
    assert total_tests == len(full_questions_to_outputs.keys())

    print(f"total_scores: {total_scores}")
    print(f"total_tests: {total_tests}")
    print(f"acc: {total_scores / total_tests}")

### Tuning learning rates

In [9]:
evaluate_results("hp_epoch_1_lr_2e-5")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2938.333333333319
total_tests: 5046
acc: 0.5823094200026395


In [10]:
evaluate_results("hp_epoch_1_lr_5e-5")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2781.999999999987
total_tests: 5046
acc: 0.5513277843836676


In [11]:
evaluate_results("hp_epoch_1_lr_1e-4")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2289.000000000007
total_tests: 5046
acc: 0.45362663495838423


In [12]:
evaluate_results("hp_epoch_1_lr_5e-4")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2.6666666666666665
total_tests: 5046
acc: 0.0005284713964856651


### Tuning epoch

In [5]:
evaluate_results("hp_epoch_2_lr_1e-4")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2391.6666666666683
total_tests: 5046
acc: 0.4739727837230813


In [7]:
evaluate_results("hp_epoch_2_lr_2e-5")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2996.6666666666483
total_tests: 5046
acc: 0.5938697318007626


### Tuning number of entries

In [6]:
# 10 entries
evaluate_results("hp_epoch_1_lr_2e-5")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2938.333333333319
total_tests: 5046
acc: 0.5823094200026395


In [7]:
evaluate_results("hp_5_entries")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2937.3333333333185
total_tests: 5046
acc: 0.5821112432289572


In [39]:
evaluate_results("hp_0_entry")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2930.999999999985
total_tests: 5046
acc: 0.5808561236623038


### Other grouping of categories

In [13]:
# Number of training data for each category

In [17]:
train_annotations_path = "dataset/okvqa_annotations/mscoco_train2014_annotations.json"
with open(train_annotations_path) as json_file:
    train_annotations = json.load(json_file)
print(f"Keys: {train_annotations.keys()}")
print(f"['question_types']['one']: {train_annotations['question_types']['one']}")
print(f"Num annotations: {len(train_annotations['annotations'])}")
print(f"annotation[0]: {train_annotations['annotations'][0]}")

Keys: dict_keys(['license', 'data_subtype', 'question_types', 'annotations', 'info', 'data_type'])
['question_types']['one']: Vehicles and Transportation
Num annotations: 9009
annotation[0]: {'image_id': 51606, 'answer_type': 'other', 'question_type': 'four', 'question_id': 516065, 'answers': [{'answer_id': 1, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 2, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 3, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 4, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 5, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 6, 'raw_answer': 'pony tail', 'answer_confidence': 'yes', 'answer': 'pony tail'}, {'answer_id': 7, 'raw_answer': 'braid', 'answer_confidence': 'yes', 'answer': 'braid'}, {'answer_id': 8, 'raw_answer': 'braid', '

In [24]:
# type of question to the number of it
qtype_to_qnum = dict()
for qtype in train_annotations['question_types'].keys():
    qtype_to_qnum[qtype] = 0

for annotation in train_annotations['annotations']:
    qtype = annotation["question_type"]
    qtype_to_qnum[qtype] += 1

totalNum = 0
for qtype, num in qtype_to_qnum.items():
    print(f"{qtype} - {train_annotations['question_types'][qtype]}: {num}")
    totalNum += num

assert totalNum == len(train_annotations['annotations'])
print(f"Total: {totalNum}")

eight - Plants and Animals: 1476
nine - Science and Technology: 153
four - Sports and Recreation: 1184
six - Geography, History, Language and Culture: 310
two - Brands, Companies and Products: 299
other - Other: 1056
one - Vehicles and Transportation: 1423
five - Cooking and Food: 1368
ten - Weather and Climate: 229
seven - People and Everyday life: 794
three - Objects, Material and Clothing: 717
Total: 9009
