In [1]:
import os
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# === Paths ===
base_path = Path.home() / "Desktop" / "processed_subs"
scored_dir = base_path / "scored_heroes_and_moral"
output_path = base_path / "heroes_and_moral_movie.csv"

# === Helper function ===
def parse_filename(filename_stem):
    try:
        year_str, title_part = filename_stem.split("_", 1)
        year = int(year_str)
        title = title_part.replace("_", " ")
        return year, title
    except Exception:
        return None, filename_stem

# === Aggregation loop ===
records = []

files = list(scored_dir.glob("*.json"))
print(f"Found {len(files)} scored movie files.")

for file in tqdm(files, desc="Aggregating Heroes_and_Moral results"):
    with open(file, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
        except Exception as e:
            print(f"⚠️ Failed to load {file.name}: {e}")
            continue

    year, title = parse_filename(file.stem.replace("_scored", ""))

    hero = data.get("hero", {})
    villain = data.get("villain", {})
    moral = data.get("moral", {})

    records.append({
        "year": year,
        "title": title,
        "hero_name": hero.get("name", "UNKNOWN"),
        "hero_nationality": hero.get("nationality", "UNKNOWN"),
        "hero_entity_type": hero.get("entity_type", "UNKNOWN"),
        "hero_confidence": hero.get("confidence", None),
        "villain_name": villain.get("name", "UNKNOWN"),
        "villain_nationality": villain.get("nationality", "UNKNOWN"),
        "villain_entity_type": villain.get("entity_type", "UNKNOWN"),
        "villain_confidence": villain.get("confidence", None),
        "moral_category": moral.get("category", "UNKNOWN"),
        "moral_confidence": moral.get("confidence", None)
    })

# === Save to CSV ===
df = pd.DataFrame(records)
df.to_csv(output_path, index=False)

print(f"\n✅ Done! Aggregated results saved to:\n{output_path}")


Found 443 scored movie files.


Aggregating Heroes_and_Moral results: 100%|█| 443/443 [00:00<00:00, 4489.16it/s]


✅ Done! Aggregated results saved to:
/Users/cedricroetheli/Desktop/processed_subs/heroes_and_moral_movie.csv



