In [4]:
import pickle
import json

### Evaluate approach 1

In [22]:
def evaluate_results(hp_folder):
    full_questions_to_outputs = {}
    for gp_id in range(4):
        file_path = f"okvqa_eval/{hp_folder}/qid_to_output_gp_{gp_id}.pkl"
        with open(file_path, "rb") as f:
            outputs_i = pickle.load(f)
            full_questions_to_outputs.update(outputs_i)
    print(f"len(full_questions_to_outputs.keys()): {len(full_questions_to_outputs.keys())}")

    # okvqa_annotations
    val_annotations_path = "dataset/okvqa_annotations/mscoco_val2014_annotations.json"
    with open(val_annotations_path) as json_file:
        val_annotations = json.load(json_file)

    def post_process_output(output):
        # Get rid of `</s>` and lowercap the output
        #eos_id = output.find("</s>")
        return output.lower()
    
    total_scores = 0
    total_tests = 0
    for annotation in val_annotations["annotations"]:
        question_id = annotation["question_id"]
        if question_id not in full_questions_to_outputs.keys():
            continue
        llava_output = full_questions_to_outputs[question_id]
        llava_output = post_process_output(llava_output)
        score = 0
        for answer in annotation['answers']:
            if llava_output == answer['answer']:
                score += 1
        total_scores += min (score / 3.0, 1)
        total_tests += 1
    assert total_tests == len(full_questions_to_outputs.keys()) == 5046

    print(f"total_scores: {total_scores}")
    print(f"total_tests: {total_tests}")
    print(f"acc: {total_scores / total_tests}")

In [23]:
evaluate_results("classifier_grouped")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2938.333333333319
total_tests: 5046
acc: 0.5823094200026395


In [24]:
evaluate_results("ground_truth_grouped")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2943.9999999999854
total_tests: 5046
acc: 0.5834324217201715


### Evaluate approach 2

In [8]:
def evaluate_results(hp_folder):
    full_questions_to_outputs = {}
    for gp_id in range(3):
        file_path = f"okvqa_eval/{hp_folder}/qid_to_output_gp_{gp_id}.pkl"
        with open(file_path, "rb") as f:
            outputs_i = pickle.load(f)
            full_questions_to_outputs.update(outputs_i)
    print(f"len(full_questions_to_outputs.keys()): {len(full_questions_to_outputs.keys())}")

    # okvqa_annotations
    val_annotations_path = "dataset/okvqa_annotations/mscoco_val2014_annotations.json"
    with open(val_annotations_path) as json_file:
        val_annotations = json.load(json_file)

    def post_process_output(output):
        # Get rid of `</s>` and lowercap the output
        #eos_id = output.find("</s>")
        return output.lower()
    
    total_scores = 0
    total_tests = 0
    for annotation in val_annotations["annotations"]:
        question_id = annotation["question_id"]
        if question_id not in full_questions_to_outputs.keys():
            continue
        llava_output = full_questions_to_outputs[question_id]
        llava_output = post_process_output(llava_output)
        score = 0
        for answer in annotation['answers']:
            if llava_output == answer['answer']:
                score += 1
        total_scores += min (score / 3.0, 1)
        total_tests += 1
    assert total_tests == len(full_questions_to_outputs.keys()) == 5046

    print(f"total_scores: {total_scores}")
    print(f"total_tests: {total_tests}")
    print(f"acc: {total_scores / total_tests}")

In [13]:
def combine_outputs(gp_id, cls):
    if cls:
        EVAL_FOLDER = "approach_2_cls"
        if gp_id == 0:
            num_files = 22
        if gp_id == 1:
            num_files = 16
        if gp_id == 2:
            num_files = 14
    else:
        EVAL_FOLDER = "approach_2_ground_truth"
        if gp_id == 0:
            num_files = 18
        if gp_id == 1:
            num_files = 20
        if gp_id == 2:
            num_files = 13

    questions_to_outputs = {}
    for i in range(num_files):
        with open(f"okvqa_eval/{EVAL_FOLDER}/gp{gp_id}_outputs/saved_outputs_{i}.pkl", "rb") as f:
            outputs_i = pickle.load(f)
            questions_to_outputs.update(outputs_i)
    output_path = f"okvqa_eval/{EVAL_FOLDER}/qid_to_output_gp_{gp_id}.pkl"
    with open(output_path, 'wb') as f:
        pickle.dump(questions_to_outputs, f)

In [3]:
# Approach 2 cls routed
# combine_outputs(0, True)
# combine_outputs(1, True)
# combine_outputs(2, True)

In [9]:
evaluate_results("approach_2_cls")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2975.666666666651
total_tests: 5046
acc: 0.5897080195534385


In [14]:
# Approach 2 ground truth routed
# combine_outputs(0, False)
# combine_outputs(1, False)
# combine_outputs(2, False)

In [15]:
evaluate_results("approach_2_ground_truth")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2973.333333333318
total_tests: 5046
acc: 0.5892456070815136


### Evaluate approach 3

In [16]:
def evaluate_results(hp_folder):
    full_questions_to_outputs = {}
    for gp_id in range(2):
        file_path = f"okvqa_eval/{hp_folder}/qid_to_output_gp_{gp_id}.pkl"
        with open(file_path, "rb") as f:
            outputs_i = pickle.load(f)
            full_questions_to_outputs.update(outputs_i)
    print(f"len(full_questions_to_outputs.keys()): {len(full_questions_to_outputs.keys())}")

    # okvqa_annotations
    val_annotations_path = "dataset/okvqa_annotations/mscoco_val2014_annotations.json"
    with open(val_annotations_path) as json_file:
        val_annotations = json.load(json_file)

    def post_process_output(output):
        # Get rid of `</s>` and lowercap the output
        #eos_id = output.find("</s>")
        return output.lower()
    
    total_scores = 0
    total_tests = 0
    for annotation in val_annotations["annotations"]:
        question_id = annotation["question_id"]
        if question_id not in full_questions_to_outputs.keys():
            continue
        llava_output = full_questions_to_outputs[question_id]
        llava_output = post_process_output(llava_output)
        score = 0
        for answer in annotation['answers']:
            if llava_output == answer['answer']:
                score += 1
        total_scores += min (score / 3.0, 1)
        total_tests += 1
    assert total_tests == len(full_questions_to_outputs.keys()) == 5046

    print(f"total_scores: {total_scores}")
    print(f"total_tests: {total_tests}")
    print(f"acc: {total_scores / total_tests}")

In [17]:
def combine_outputs(gp_id, cls):
    if cls:
        EVAL_FOLDER = "approach_3_cls"
        if gp_id == 0:
            num_files = 19
        if gp_id == 1:
            num_files = 32
    else:
        EVAL_FOLDER = "approach_3_ground_truth"
        if gp_id == 0:
            num_files = 24
        if gp_id == 1:
            num_files = 28

    questions_to_outputs = {}
    for i in range(num_files):
        with open(f"okvqa_eval/{EVAL_FOLDER}/gp{gp_id}_outputs/saved_outputs_{i}.pkl", "rb") as f:
            outputs_i = pickle.load(f)
            questions_to_outputs.update(outputs_i)
    output_path = f"okvqa_eval/{EVAL_FOLDER}/qid_to_output_gp_{gp_id}.pkl"
    with open(output_path, 'wb') as f:
        pickle.dump(questions_to_outputs, f)

In [18]:
# Approach 3 cls routed
# combine_outputs(0, True)
# combine_outputs(1, True)

In [19]:
evaluate_results("approach_3_cls")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2965.6666666666506
total_tests: 5046
acc: 0.5877262518166172


In [20]:
# Approach 3 cls routed
combine_outputs(0, False)
combine_outputs(1, False)

In [21]:
evaluate_results("approach_3_ground_truth")

len(full_questions_to_outputs.keys()): 5046
total_scores: 2974.6666666666506
total_tests: 5046
acc: 0.5895098427797564
