In [1]:
import os
import json
import time
import requests
import pandas as pd
from pathlib import Path
from tqdm import tqdm

# === OpenAI API setup ===
api_key = "api-key"  # 🔒 Replace with your OpenAI key
api_url = "https://api.openai.com/v1/chat/completions"
model_name = "gpt-4o"

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json",
}

# === Paths ===
base = Path.home() / "Desktop" / "Benchmark"
subs_dir = base / "json_benchmark"
summaries_dir = base / "summaries"
output_dir = base / "scored_themes_openai"
prompts_dir = base / "prompts_themes_openai"

output_dir.mkdir(parents=True, exist_ok=True)
prompts_dir.mkdir(parents=True, exist_ok=True)

# === Load match file ===
matches_df = pd.read_csv("matches_benchmark.csv")

# === Prompt template ===
def create_theme_prompt(summary, chunk):
    prompt = (
        "You are analyzing a film based on its dialogue and a summary of its plot.\n\n"
        "Your task is to identify the ideological, societal, or political themes expressed in the film.\n\n"
        "Definitions:\n"
        "- Ideological themes refer to ideas or values associated with a worldview or belief system, such as attitudes toward authority, individual rights, or tradition.\n"
        "- Societal themes reflect cultural norms, social roles, or commentary on how people relate to institutions, communities, or each other.\n"
        "- Political themes involve power structures, governance, laws, rights, or critiques of political institutions or ideologies.\n\n"
        "Please:\n"
        "1. Provide a list of 3 to 7 themes in the form of keywords or short phrases.\n"
        "2. Include a confidence score from 0.0 to 1.0 indicating how confident you are in your analysis.\n"
        "3. Briefly explain your reasoning in 1–2 sentences.\n\n"
        "Return your answer strictly in the following JSON format:\n\n"
        "{\n"
        "  \"themes\": [\"...\", \"...\", \"...\"],\n"
        "  \"confidence\": ...,\n"
        "  \"explanation\": \"...\"\n"
        "}\n\n"
        f"Film Summary:\n\"\"\"{summary}\"\"\"\n\n"
        f"Dialogue:\n\"\"\"{chunk}\"\"\""
    )
    return prompt

# === OpenAI API call ===
def call_openai(prompt):
    payload = {
        "model": model_name,
        "temperature": 0,
        "max_tokens": 600,
        "messages": [
            {"role": "system", "content": "You are a careful film analyst. Always return exactly and only the required JSON."},
            {"role": "user", "content": prompt}
        ]
    }

    response = requests.post(api_url, headers=headers, json=payload)

    if response.status_code == 429:
        print("⚠️ Rate limit hit. Waiting...")
        time.sleep(60)
        return call_openai(prompt)
    elif response.status_code != 200:
        print(f"❌ API Error: {response.status_code}")
        print(response.text)
        return None

    try:
        content = response.json()["choices"][0]["message"]["content"]

        # Strip code fences if present
        if content.strip().startswith("```"):
            content = content.strip().strip("`")
            if content.lower().startswith("json"):
                content = content[4:].strip()

        return json.loads(content)
    except Exception as e:
        print(f"⚠️ JSON parsing error:\n{response.text}")
        return None

# === Subtitle chunking ===
def chunk_dialogue(subs, chunk_size=5000, overlap=200):
    text_blocks = [line.get("text", "").strip() for line in subs if line.get("text")]
    full_text = " ".join(text_blocks)

    chunks = []
    start = 0
    while start < len(full_text):
        end = start + chunk_size
        chunks.append(full_text[start:end])
        start = end - overlap
    return chunks

# === Main loop ===
for _, row in tqdm(matches_df.iterrows(), total=len(matches_df), desc="Scoring themes (OpenAI)"):
    filename = row["subtitle_filename"]
    json_path = subs_dir / f"{filename}.json"
    summary_path = summaries_dir / f"{filename}.srt_summary.txt"

    if not json_path.exists() or not summary_path.exists():
        print(f"⚠️ Missing files for: {filename}")
        continue

    with open(json_path, "r", encoding="utf-8") as f:
        subs = json.load(f)

    with open(summary_path, "r", encoding="utf-8") as f:
        summary = f.read().strip()

    chunks = chunk_dialogue(subs)

    for i, chunk in enumerate(chunks):
        output_path = output_dir / f"{filename}_chunk{i+1}_themes.json"
        prompt_path = prompts_dir / f"{filename}_chunk{i+1}_themes_prompt.txt"

        if output_path.exists():
            print(f"🟡 Already scored: {filename} chunk {i+1}")
            continue

        prompt = create_theme_prompt(summary, chunk)

        with open(prompt_path, "w", encoding="utf-8") as f:
            f.write(prompt)

        result = call_openai(prompt)
        if result:
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(result, f, indent=2)
            print(f"✅ Scored: {filename} chunk {i+1}")
        else:
            print(f"❌ Failed: {filename} chunk {i+1}")

        time.sleep(1.5)


Scoring themes (OpenAI):   0%|                           | 0/20 [00:00<?, ?it/s]

✅ Scored: 2006Blood.Diamond chunk 1
✅ Scored: 2006Blood.Diamond chunk 2
✅ Scored: 2006Blood.Diamond chunk 3
✅ Scored: 2006Blood.Diamond chunk 4
✅ Scored: 2006Blood.Diamond chunk 5
✅ Scored: 2006Blood.Diamond chunk 6
✅ Scored: 2006Blood.Diamond chunk 7
✅ Scored: 2006Blood.Diamond chunk 8
✅ Scored: 2006Blood.Diamond chunk 9
✅ Scored: 2006Blood.Diamond chunk 10
✅ Scored: 2006Blood.Diamond chunk 11


Scoring themes (OpenAI):   5%|▉                  | 1/20 [01:26<27:14, 86.04s/it]

✅ Scored: 2005The.Constant.Gardener chunk 1
✅ Scored: 2005The.Constant.Gardener chunk 2
✅ Scored: 2005The.Constant.Gardener chunk 3
✅ Scored: 2005The.Constant.Gardener chunk 4
✅ Scored: 2005The.Constant.Gardener chunk 5
✅ Scored: 2005The.Constant.Gardener chunk 6
✅ Scored: 2005The.Constant.Gardener chunk 7
✅ Scored: 2005The.Constant.Gardener chunk 8
✅ Scored: 2005The.Constant.Gardener chunk 9
✅ Scored: 2005The.Constant.Gardener chunk 10
✅ Scored: 2005The.Constant.Gardener chunk 11
✅ Scored: 2005The.Constant.Gardener chunk 12
✅ Scored: 2005The.Constant.Gardener chunk 13


Scoring themes (OpenAI):  10%|█▉                 | 2/20 [02:25<21:08, 70.48s/it]

✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 1
✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 2
✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 3
✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 4
✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 5
✅ Scored: 1981Indiana.Jones.And.The.Raiders.Of.The.Lost.Ark chunk 6


Scoring themes (OpenAI):  15%|██▊                | 3/20 [02:59<15:15, 53.87s/it]

✅ Scored: 2009Avatar chunk 1
✅ Scored: 2009Avatar chunk 2
✅ Scored: 2009Avatar chunk 3
✅ Scored: 2009Avatar chunk 4
✅ Scored: 2009Avatar chunk 5
✅ Scored: 2009Avatar chunk 6
✅ Scored: 2009Avatar chunk 7
✅ Scored: 2009Avatar chunk 8
✅ Scored: 2009Avatar chunk 9
✅ Scored: 2009Avatar chunk 10
✅ Scored: 2009Avatar chunk 11


Scoring themes (OpenAI):  20%|███▊               | 4/20 [03:53<14:18, 53.64s/it]

✅ Scored: 2012The.Hunger.Games chunk 1
✅ Scored: 2012The.Hunger.Games chunk 2
✅ Scored: 2012The.Hunger.Games chunk 3
✅ Scored: 2012The.Hunger.Games chunk 4
✅ Scored: 2012The.Hunger.Games chunk 5
✅ Scored: 2012The.Hunger.Games chunk 6
✅ Scored: 2012The.Hunger.Games chunk 7
✅ Scored: 2012The.Hunger.Games chunk 8


Scoring themes (OpenAI):  25%|████▊              | 5/20 [04:24<11:24, 45.63s/it]

✅ Scored: 1984Ghostbusters chunk 1
✅ Scored: 1984Ghostbusters chunk 2
✅ Scored: 1984Ghostbusters chunk 3
✅ Scored: 1984Ghostbusters chunk 4
✅ Scored: 1984Ghostbusters chunk 5
✅ Scored: 1984Ghostbusters chunk 6
✅ Scored: 1984Ghostbusters chunk 7
✅ Scored: 1984Ghostbusters chunk 8
✅ Scored: 1984Ghostbusters chunk 9
✅ Scored: 1984Ghostbusters chunk 10


Scoring themes (OpenAI):  30%|█████▋             | 6/20 [05:16<11:08, 47.74s/it]

✅ Scored: 1978Superman chunk 1
✅ Scored: 1978Superman chunk 2
✅ Scored: 1978Superman chunk 3
✅ Scored: 1978Superman chunk 4
✅ Scored: 1978Superman chunk 5
✅ Scored: 1978Superman chunk 6
✅ Scored: 1978Superman chunk 7
✅ Scored: 1978Superman chunk 8
✅ Scored: 1978Superman chunk 9
✅ Scored: 1978Superman chunk 10
✅ Scored: 1978Superman chunk 11


Scoring themes (OpenAI):  35%|██████▋            | 7/20 [06:04<10:23, 47.99s/it]

✅ Scored: 2008The.Hurt.Locker chunk 1
✅ Scored: 2008The.Hurt.Locker chunk 2
✅ Scored: 2008The.Hurt.Locker chunk 3
✅ Scored: 2008The.Hurt.Locker chunk 4
✅ Scored: 2008The.Hurt.Locker chunk 5
✅ Scored: 2008The.Hurt.Locker chunk 6
✅ Scored: 2008The.Hurt.Locker chunk 7
✅ Scored: 2008The.Hurt.Locker chunk 8
✅ Scored: 2008The.Hurt.Locker chunk 9


Scoring themes (OpenAI):  40%|███████▌           | 8/20 [06:49<09:23, 46.92s/it]

✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 1
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 2
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 3
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 4
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 5
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 6
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 7
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 8
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 9
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 10
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 11
✅ Scored: 1977Star.Wars.Episode.IV.-.A.New.Hope chunk 12


Scoring themes (OpenAI):  45%|████████▌          | 9/20 [07:47<09:13, 50.28s/it]

✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 1
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 2
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 3
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 4
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 5
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 6
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 7
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 8
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 9
✅ Scored: 2003Pirates.of.the.Caribbean.The.Curse.of.the.Black.Pearl chunk 10


Scoring themes (OpenAI):  50%|█████████         | 10/20 [08:45<08:47, 52.78s/it]

✅ Scored: 2018Black.Panther chunk 1
✅ Scored: 2018Black.Panther chunk 2
✅ Scored: 2018Black.Panther chunk 3
✅ Scored: 2018Black.Panther chunk 4
✅ Scored: 2018Black.Panther chunk 5
✅ Scored: 2018Black.Panther chunk 6
✅ Scored: 2018Black.Panther chunk 7
✅ Scored: 2018Black.Panther chunk 8
✅ Scored: 2018Black.Panther chunk 9
✅ Scored: 2018Black.Panther chunk 10


Scoring themes (OpenAI):  55%|█████████▉        | 11/20 [09:39<07:59, 53.24s/it]

✅ Scored: 2021Dont.Look.Up chunk 1
✅ Scored: 2021Dont.Look.Up chunk 2
✅ Scored: 2021Dont.Look.Up chunk 3
✅ Scored: 2021Dont.Look.Up chunk 4
✅ Scored: 2021Dont.Look.Up chunk 5
✅ Scored: 2021Dont.Look.Up chunk 6
✅ Scored: 2021Dont.Look.Up chunk 7
✅ Scored: 2021Dont.Look.Up chunk 8
✅ Scored: 2021Dont.Look.Up chunk 9
✅ Scored: 2021Dont.Look.Up chunk 10
✅ Scored: 2021Dont.Look.Up chunk 11
✅ Scored: 2021Dont.Look.Up chunk 12
✅ Scored: 2021Dont.Look.Up chunk 13
✅ Scored: 2021Dont.Look.Up chunk 14
✅ Scored: 2021Dont.Look.Up chunk 15
✅ Scored: 2021Dont.Look.Up chunk 16
✅ Scored: 2021Dont.Look.Up chunk 17


Scoring themes (OpenAI):  60%|██████████▊       | 12/20 [11:20<09:00, 67.56s/it]

✅ Scored: 1982First.Blood chunk 1
✅ Scored: 1982First.Blood chunk 2
✅ Scored: 1982First.Blood chunk 3
✅ Scored: 1982First.Blood chunk 4
✅ Scored: 1982First.Blood chunk 5
✅ Scored: 1982First.Blood chunk 6
✅ Scored: 1982First.Blood chunk 7


Scoring themes (OpenAI):  65%|███████████▋      | 13/20 [12:00<06:55, 59.39s/it]

✅ Scored: 2019Joker chunk 1
✅ Scored: 2019Joker chunk 2
✅ Scored: 2019Joker chunk 3
✅ Scored: 2019Joker chunk 4
✅ Scored: 2019Joker chunk 5
✅ Scored: 2019Joker chunk 6
✅ Scored: 2019Joker chunk 7
✅ Scored: 2019Joker chunk 8
✅ Scored: 2019Joker chunk 9


Scoring themes (OpenAI):  70%|████████████▌     | 14/20 [13:01<05:58, 59.73s/it]

✅ Scored: 2006Night.at.the.Museum chunk 1
✅ Scored: 2006Night.at.the.Museum chunk 2
✅ Scored: 2006Night.at.the.Museum chunk 3
✅ Scored: 2006Night.at.the.Museum chunk 4
✅ Scored: 2006Night.at.the.Museum chunk 5
✅ Scored: 2006Night.at.the.Museum chunk 6
✅ Scored: 2006Night.at.the.Museum chunk 7
✅ Scored: 2006Night.at.the.Museum chunk 8
✅ Scored: 2006Night.at.the.Museum chunk 9
✅ Scored: 2006Night.at.the.Museum chunk 10


Scoring themes (OpenAI):  75%|█████████████▌    | 15/20 [13:50<04:42, 56.52s/it]

✅ Scored: 1976Rocky.I chunk 1
✅ Scored: 1976Rocky.I chunk 2
✅ Scored: 1976Rocky.I chunk 3
✅ Scored: 1976Rocky.I chunk 4
✅ Scored: 1976Rocky.I chunk 5
✅ Scored: 1976Rocky.I chunk 6
✅ Scored: 1976Rocky.I chunk 7
✅ Scored: 1976Rocky.I chunk 8
✅ Scored: 1976Rocky.I chunk 9
✅ Scored: 1976Rocky.I chunk 10
✅ Scored: 1976Rocky.I chunk 11
✅ Scored: 1976Rocky.I chunk 12
✅ Scored: 1976Rocky.I chunk 13


Scoring themes (OpenAI):  80%|██████████████▍   | 16/20 [14:55<03:56, 59.19s/it]

✅ Scored: 2005V.for.Vendetta chunk 1
✅ Scored: 2005V.for.Vendetta chunk 2
✅ Scored: 2005V.for.Vendetta chunk 3
✅ Scored: 2005V.for.Vendetta chunk 4
✅ Scored: 2005V.for.Vendetta chunk 5
✅ Scored: 2005V.for.Vendetta chunk 6
✅ Scored: 2005V.for.Vendetta chunk 7
✅ Scored: 2005V.for.Vendetta chunk 8
✅ Scored: 2005V.for.Vendetta chunk 9
✅ Scored: 2005V.for.Vendetta chunk 10
✅ Scored: 2005V.for.Vendetta chunk 11
✅ Scored: 2005V.for.Vendetta chunk 12
✅ Scored: 2005V.for.Vendetta chunk 13
✅ Scored: 2005V.for.Vendetta chunk 14
✅ Scored: 2005V.for.Vendetta chunk 15
✅ Scored: 2005V.for.Vendetta chunk 16


Scoring themes (OpenAI):  85%|███████████████▎  | 17/20 [16:14<03:15, 65.06s/it]

✅ Scored: 2017Paddington.2 chunk 1
✅ Scored: 2017Paddington.2 chunk 2
✅ Scored: 2017Paddington.2 chunk 3
✅ Scored: 2017Paddington.2 chunk 4
✅ Scored: 2017Paddington.2 chunk 5
✅ Scored: 2017Paddington.2 chunk 6
✅ Scored: 2017Paddington.2 chunk 7
✅ Scored: 2017Paddington.2 chunk 8
✅ Scored: 2017Paddington.2 chunk 9
✅ Scored: 2017Paddington.2 chunk 10
✅ Scored: 2017Paddington.2 chunk 11
✅ Scored: 2017Paddington.2 chunk 12


Scoring themes (OpenAI):  90%|████████████████▏ | 18/20 [17:14<02:07, 63.69s/it]

✅ Scored: 1985Back.To.The.Future chunk 1
✅ Scored: 1985Back.To.The.Future chunk 2
✅ Scored: 1985Back.To.The.Future chunk 3
✅ Scored: 1985Back.To.The.Future chunk 4
✅ Scored: 1985Back.To.The.Future chunk 5
✅ Scored: 1985Back.To.The.Future chunk 6
✅ Scored: 1985Back.To.The.Future chunk 7
✅ Scored: 1985Back.To.The.Future chunk 8
✅ Scored: 1985Back.To.The.Future chunk 9
✅ Scored: 1985Back.To.The.Future chunk 10
✅ Scored: 1985Back.To.The.Future chunk 11


Scoring themes (OpenAI):  95%|█████████████████ | 19/20 [18:09<01:00, 60.95s/it]

✅ Scored: 2013The.Purge chunk 1
✅ Scored: 2013The.Purge chunk 2
✅ Scored: 2013The.Purge chunk 3
✅ Scored: 2013The.Purge chunk 4
✅ Scored: 2013The.Purge chunk 5
✅ Scored: 2013The.Purge chunk 6
✅ Scored: 2013The.Purge chunk 7


Scoring themes (OpenAI): 100%|██████████████████| 20/20 [18:41<00:00, 56.10s/it]
