In [3]:
import json
import pandas as pd
from pathlib import Path

# Paths
processed_subs_dir = Path.home() / "Desktop" / "processed_subs"
summaries_dir = processed_subs_dir / "summaries"
output_dir = processed_subs_dir / "prompts"
output_dir.mkdir(parents=True, exist_ok=True)

# Load final_matches
final_matches = pd.read_csv("final_matches.csv")

# Prompt template function
def build_prompt(summary, chunk_text):
    return f"""You are a political sentiment classifier. Below is context for a movie followed by a chunk of its dialogue. Score the dialogue only, using the context for clarity.

Movie Summary:
\"\"\"{summary}\"\"\"

Dialogue Chunk:
\"\"\"{chunk_text}\"\"\"

Score the chunk from -1 to 1 on the following dimensions:
- Universalism
- Egalitarianism
- Progress

Respond in JSON:
{{
  "universalism": [score],
  "egalitarianism": [score],
  "progress": [score]
}}"""

# Loop through all matched movies
for idx, row in final_matches.iterrows():
    subtitle_file = row["subtitle_filename"]
    
    # Paths
    json_path = processed_subs_dir / f"{subtitle_file}.json"
    summary_path = summaries_dir / f"{subtitle_file}_summary.txt"
    output_path = output_dir / f"{subtitle_file}_prompts.json"
    
    # Skip if summary is missing
    if not summary_path.exists() or not json_path.exists():
        print(f"⚠️ Missing file(s) for: {subtitle_file}")
        continue
    
    # Load subtitle chunks
    with open(json_path, "r", encoding="utf-8") as f:
        chunks = json.load(f)

    # Load summary
    with open(summary_path, "r", encoding="utf-8") as f:
        summary = f.read().strip()

    # Build prompts
    prompts = []
    for chunk in chunks:
        prompt = {
            "chunk_id": chunk["chunk_id"],
            "prompt": build_prompt(summary, chunk["text"])
        }
        prompts.append(prompt)
    
    # Save prompts for this movie
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(prompts, f, indent=2, ensure_ascii=False)
    
    print(f"✅ Prompts created for: {subtitle_file}")


✅ Prompts created for: 2017_Logan
✅ Prompts created for: 1979_10
✅ Prompts created for: 1983_Tootsie
✅ Prompts created for: 2017_It
✅ Prompts created for: 1981_Excalibur
✅ Prompts created for: 1986_Top Gun
✅ Prompts created for: 1996_Ransom_EN
✅ Prompts created for: 1996_The First Wives Club
✅ Prompts created for: 1980_Stir Crazy
✅ Prompts created for: 2016_Deadpool
✅ Prompts created for: 2004_Shark Tale.en
✅ Prompts created for: 1998_Godzilla
✅ Prompts created for: 2018_Deadpool 2
✅ Prompts created for: 2020_1917
✅ Prompts created for: 1990_Driving Miss Daisy
✅ Prompts created for: 1985_Witness
✅ Prompts created for: 1985_Rocky IV
✅ Prompts created for: 2015_Jurassic World
✅ Prompts created for: 1997_Liar Liar
✅ Prompts created for: 1979_Rocky II
✅ Prompts created for: 1995_Toy Story
✅ Prompts created for: 1988_Cocktail_Eng
✅ Prompts created for: 1990_Ghost
✅ Prompts created for: 2007_300.eng
✅ Prompts created for: 2001_Shrek
✅ Prompts created for: 1995_Waterworld.en
✅ Prompts created