In [None]:
"""
Load the models
"""
from llm.api.openai_models import GPTChat
from llm.api.gemini_models import GeminiChat
from llm.api.azure_openai_models import AzureChat
from llm.api.deepinfra_models import DeepInfraChat
from llm.local.llava_model import LlavaChat

llm = GPTChat("gpt-4o-mini")
# llm = GeminiChat("gemini-1.5-pro")

mllm = GPTChat("gpt-4o-mini")
# mllm = GeminiChat("gemini-1.5-pro")
# mllm = DeepInfraChat("meta-llama/Llama-3.2-11B-Vision-Instruct")

model_names = "4o-mini+4o-mini"

"""
Load the dataset through customized json file
"""
import json
from dataset import *
dataset = ImageDataset(llm)   # the model here used for "ENTITY EXTRACTION" !!!

In [None]:
""" Entity Extraction """
dataset.load("load/mmhal_data.json")
dataset.new_generate_caption(mllm, "mmhal_" + model_names + "_1_entity_extraction")

In [None]:
""" Question Decomposition """
dataset.load("save/mmhal_" + model_names + "_1_entity_extraction.json")
dataset.generate_subquestion_overall(llm, "mmhal_" + model_names + "_2_test_subquestion")

In [None]:
"""
SubQuestion Answering (original image + cropped image)
"""
dataset.subquestion_answering_overall(mllm, "mmhal_" + model_names + "_3_test_answer")

In [None]:
"""
Summarization (Summarize the subquestion-answer pairs)
    - if no subquestion, just returns empty string info ""
"""
dataset.subquestion_summarization_overall(llm, "mmhal_" + model_names + "_4_test_summarize")

In [None]:
"""
Final Question Answering
"""
dataset.question_answering(mllm, "mmhal_" + model_names + "_5_test_answer")

with open(f"save/mmhal_" + model_names + "_5_test_answer.json", "r") as f:
    data = json.load(f)

with open("save/mmhal_" + model_names + "_5_test_answer_results.json", "w") as output_f:
    results = {sol["id"]: sol["mllm_answer"] for sol in data}
    json.dump(results, output_f, indent=4)

In [None]:
from debator.base import Debate
debator = Debate(llm=llm, mllm=mllm)
dataset.debate(debator, "mmhal_" + model_names + "_6_debating")

with open(f"save/mmhal_" + model_names + "_6_debating.json", "r") as f:
    data = json.load(f)

with open("save/mmhal_" + model_names + "_6_debating_results.json", "w") as output_f:
    results = {sol["id"]: sol["answer_after_debating"] for sol in data}
    json.dump(results, output_f, indent=4)