In [None]:
"""
Load the models
"""
from llm.api.openai_models import GPTChat
from llm.api.gemini_models import GeminiChat
from llm.api.azure_openai_models import AzureChat
from llm.api.deepinfra_models import DeepInfraChat
# mllm = DeepInfraChat("meta-llama/Llama-3.2-11B-Vision-Instruct")


llm = AzureChat("gpt-4o-mini")
mllm = DeepInfraChat("meta-llama/Llama-3.2-11B-Vision-Instruct")

"""
Load the dataset through customized json file
"""
from dataset import *
dataset = ImageDataset(llm)   # the model here used for "ENTITY EXTRACTION" !!!

prefix = "mmvet_4omini+llama11b_"

In [None]:
""" Entity Extraction """
dataset.load("load/mmvet.json")
dataset.new_generate_caption(mllm, prefix + "1_entity_extraction")    # model used for "CAPTION GENERATION" !!!

In [None]:
""" Question Decomposition """
dataset.load(f"save/{prefix}1_entity_extraction.json")
dataset.generate_subquestion_overall(llm, prefix + "2_test_subquestion")

In [None]:
"""
SubQuestion Answering (original image + cropped image)
"""
dataset.load(f"save/{prefix}2_test_subquestion.json")
dataset.subquestion_answering_overall(mllm, prefix + "3_test_answer")

In [None]:
"""
Summarization (Summarize the subquestion-answer pairs)
    - if no subquestion, just returns empty string info ""
"""
dataset.load(f"save/{prefix}3_test_answer.json")
dataset.subquestion_summarization_overall(llm, prefix + "4_test_summarize")


In [None]:
"""
Final Question Answering
"""
dataset.load(f"save/{prefix}4_test_summarize.json")
dataset.question_answering(mllm, prefix + "5_test_answer")

In [None]:
import json

with open(f"save/{prefix}5_test_answer.json", "r") as f:
    data = json.load(f)

with open("results.json", "w") as output_f:
    results = {sol["id"]: sol["mllm_answer"] for sol in data}
    json.dump(results, output_f, indent=4)

In [None]:
from debator.base import Debate
debator = Debate(llm=llm, mllm=mllm)

dataset.load(f"save/{prefix}5_test_answer.json")
dataset.debate(debator, prefix + "6_debating")

In [None]:
import json

with open(f"save/{prefix}6_debating.json", "r") as f:
    data = json.load(f)

with open("results.json", "w") as output_f:
    results = {sol["id"]: sol["answer_after_debating"] for sol in data}
    json.dump(results, output_f, indent=4)