In [None]:
import json
import pandas as pd
import os

model_names = [
    "claude3.5sonnet",
    "claude3.5",
    "claude3.7",
    "gpt4o-mini",
    "gpt4o"
]

# Dictionary to hold results keyed by video_name
video_dict = {}

for name in model_names:
    file_path = f"responses_{name}_images_hyp_1.json"
    if not os.path.exists(file_path):
        continue

    with open(file_path, "r") as f:
        data = json.load(f)

    for item in data:
        for video_name, content in item.items():
            if video_name not in video_dict:
                video_dict[video_name] = {}

            if isinstance(content, str):
                try:
                    content_json = json.loads(content)
                    video_dict[video_name][f"{name}-description"] = content_json.get("description", "")
                    video_dict[video_name][f"{name}-reasoning"] = content_json.get("reasoning", "")
                    video_dict[video_name][f"{name}-anomaly"] = content_json.get("Type", "")
                except json.JSONDecodeError:
                    video_dict[video_name][f"{name}-description"] = ""
                    video_dict[video_name][f"{name}-reasoning"] = ""
                    video_dict[video_name][f"{name}-anomaly"] = ""
            else:
                video_dict[video_name][f"{name}-description"] = ""
                video_dict[video_name][f"{name}-reasoning"] = ""
                video_dict[video_name][f"{name}-anomaly"] = ""

# Convert to DataFrame
df = pd.DataFrame.from_dict(video_dict, orient='index')
df.index.name = "video_name"
df.reset_index(inplace=True)

# Save to Excel
df.to_excel("extracted_video_anomalies_all_models.xlsx", index=False)
