In [1]:
import os
import time
import json
import re
import sys
import csv
 
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv
from pydantic import BaseModel, ConfigDict, field_validator, Field, RootModel
from enum import Enum
from typing import List, Optional, Dict, Tuple, Any
from datetime import date
from concurrent import futures
from tqdm import tqdm
from pathlib import Path
from openai.lib._pydantic import to_strict_json_schema

# Add parent directory to path to import from implementation package
# Notebooks are in implementation/notebooks/, so we go up two levels to project root
sys.path.insert(0, str(Path().resolve().parent.parent))

from implementation.llms.query_understanding_methods import (
    extract_lexical_entities,
    create_channel_weights,
    extract_single_metadata_preference,
    extract_all_metadata_preferences,
    create_single_vector_subquery,
    create_all_vector_subqueries,
    create_single_vector_weight,
    create_all_vector_weights,
    create_overall_query_understanding,
)

# Load environment variables (for API key)
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [6]:
OVERALL_TEST_QUERIES = [
    "The Godfather",
    "Tom Hanks movies",
    "90s comedies",
    "something feel-good and lighthearted",
    "that movie with the spinning top at the end",
    "films directed by Fincher starring Brad Pitt",
    "A24 horror movies",
    "movies with a character like Walter White",
    "Spielberg and Lucas collaborations",
    "that leandro dicaprio boat movie from 2001",
    "Christoph Nolan's space movie with Matt Damon",
    "shawshank movie prison escape morgan friedman",
    "horror movies but not slashers or torture porn",
    "80s action without Schwarzenegger or Stallone",
    "thrillers that aren't too stressful or dark",
    "critically acclaimed sci-fi from the last 5 years under 2 hours",
    "R-rated crime dramas from before 1980",
    "foreign language best picture nominees",
    "something my parents and kids can all watch together",
    "background movie while I work, nothing too demanding",
    "first date movie that's romantic but not cheesy",
    "movies about grief and learning to move on",
    "heist movies with a twist ending where the villain wins",
    "nonlinear storytelling like Pulp Fiction or Memento",
    "underrated 90s neo-noir thrillers with morally ambiguous protagonists, preferably under 2 hours, not directed by the usual suspects like Tarantino",
]

## Generic Kimmi K Calling Function

## Lexical Entity

In [2]:
TEST_LEXICAL_QUERIES = [
    "movies starring Tom Hanks",
    "films with Leandro Dicaprio and Bred Pit",
    "best LOTR movies",
    "action movies not starring Nicolas Cage",
    "Pixar movies directed by Brad Bird starring Samuel L Jackson",
    "movies like Star Wars",
    "movies featuring Darth Vader",
    "horror films from A24 or Blumhouse",
    "movies with Jack Sparrow",
    "funny scary movies from the 90s with great acting",
    "rocky 4 and rambo 2",
    "movies like se7en or the number 23",
    "films with schwarzenegger and stallone",
    "movies with the word 'death' in the title",
    "waner brothers movies with harry poter not directed by chris colombus",
    "best action movies with plot twists",
    "romantic comedies from the 2000s",
    "underrated foreign films with strong female leads",
]

In [3]:
# Run sample test cases (parallelized, up to 25 queries at a time)

lexical_entity_results = []

with futures.ThreadPoolExecutor(max_workers=25) as executor:
    # Submit extract_lexical_entities for each query
    future_to_query = {executor.submit(extract_lexical_entities, query): query for query in OVERALL_TEST_QUERIES}

    # Collect results as they complete, with progress bar
    for future in tqdm(
        futures.as_completed(future_to_query),
        total=len(OVERALL_TEST_QUERIES),
        desc="Processing lexical entity queries",
    ):
        query = future_to_query[future]
        try:
            results = future.result()
            lexical_entity_results.append((query, results))
        except Exception as e:
            print(f"Error processing query '{query}': {e}")

print(f"Generated {len(lexical_entity_results)} lexical entity results")

Processing lexical entity queries: 100%|██████████| 25/25 [00:03<00:00,  7.61it/s]

Generated 25 lexical entity results





In [None]:
lexical_entity_results

In [4]:
# Save to lexical_entity_results.csv

with open('../generated_data/lexical_entity_results.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["query", "results"])
    writer.writeheader()
    for query, results in lexical_entity_results:
        writer.writerow({"query": query, "results": results.model_dump_json()})

print(f"Saved {len(lexical_entity_results)} results to lexical_entity_results.csv")

Saved 25 results to lexical_entity_results.csv


## Metadata Preferences

In [15]:
TEST_METADATA_QUERIES = [
    "brisk 90s action flick, nothing plodding",
    "Portuguese thriller with English subtitles, won something at Sundance",
    "everyone saw it but critics were mixed, big summer tentpole",
    "leisurely paced drama, I have all afternoon",
    "Taiwanese coming-of-age, light and breezy, nothing heavy",
    "something trashy and fun, totally panned, perfect for wine night",
    "late 2010s superhero film, appropriate for my 12-year-old",
    "moody Nordic noir, could be Swedish or Danish",
    "tightly edited, under 100 minutes, no filler",
    "arthouse darling that flopped commercially",
    "streaming free on Tubi, campy 80s horror, the cheesier the better",
    "beautifully shot but narratively messy, visually stunning",
    "films from the silent era, slapstick preferred",
    "I can only rent tonight, nothing on my subscriptions has what I want",
    "British gangster film, stylish and quotable, Guy Ritchie vibes",
    "not looking for anything mainstream, obscure foreign gems only",
    "certified banger, everyone at work won't shut up about it",
    "exactly rated R, I want the hard stuff, uncut",
    "polarizing film, some call it genius others call it pretentious garbage",
    "Australian outback thriller, gritty and relentless, 2000s era",
]

In [16]:
metadata_preferences_results = []

with futures.ThreadPoolExecutor(max_workers=6) as executor:
    # Submit extract_lexical_entities for each query
    future_to_query = {executor.submit(extract_all_metadata_preferences, query): query for query in TEST_METADATA_QUERIES}

    # Collect results as they complete, with progress bar
    for future in tqdm(
        futures.as_completed(future_to_query),
        total=len(TEST_METADATA_QUERIES),
        desc="Processing metadata preference queries",
    ):
        query = future_to_query[future]
        try:
            results = future.result()
            metadata_preferences_results.append((query, results))
        except Exception as e:
            print(f"Error processing query '{query}': {e}")

print(f"Generated {len(metadata_preferences_results)} responses")

Processing metadata preference queries: 100%|██████████| 20/20 [00:12<00:00,  1.60it/s]

Generated 20 responses





In [6]:
metadata_preferences_results

[('brisk 90s action flick, nothing plodding',
  MetadataPreferencesResponse(release_date_preference=DatePreference(first_date='1990-01-01', match_operation='between', second_date='1999-12-31'), duration_preference=NumericalPreference(first_value=90.0, match_operation='exact', second_value=None), genres_preference=GenreListPreference(should_include=['Action'], should_exclude=[]), audio_languages_preference=ListPreference(should_include=[], should_exclude=[]), watch_providers_preference=WatchProvidersPreference(should_include=[], should_exclude=[], preferred_access_type=None), maturity_rating_preference=MaturityPreference(rating='PG-13', match_operation='less_than_or_equal'), popular_trending_preference=PopularTrendingPreference(prefers_trending_movies=False, prefers_popular_movies=False), reception_preference=ReceptionPreference(reception_type='no_preference')))]

In [18]:
# Save to metadata_preferences_results.csv

with open("../generated_data/metadata_preferences_results.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["query", "results"])
    writer.writeheader()
    for query, results in metadata_preferences_results:
        writer.writerow({"query": query, "results": results.model_dump_json()})

print(f"Saved {len(metadata_preferences_results)} results to metadata_preferences_results.csv")

Saved 20 results to metadata_preferences_results.csv


## Vector Subqueries

In [2]:
TEST_VECTOR_QUERIES = [
  "manly action movies from the 80s",
  "movies like parasite but american and funnier not dumb",
  "1990s french psych thriller not slow not frantic nonlinear timeline unreliable narrator no gore but creepy critics said beautiful cinematography plot holes",
  "hand drawn animation not cgi spanish audio coming of age dramedy uplifting and hopeful iconic songs great dialogue",
  "two strangers handcuffed together escape the city in one night set in tokyo time loop twist ending",
  "date night movie to unwind after a long day funny but not gross no jump scares",
  "low budget indie filmed in new york directed by nolan?? (or similar vibe) mixed reviews overrated but still smart",
  "science fiction war epic intergalactic warfare morally gray lead ticking clock deadline red herrings",
  "love-to-hate villain redemption arc but also unreliable narrator fourth wall breaks",
  # "cozy sick day comfort watch background at a party not too loud not overstimulating ear bursting sound avoid that",
  "a goofy movie but i mean goofy like silly not the title, 90s vibe, witty dialogue, not slow",
  "romcom about two rival bakers, light and flirty, 00s vibe",
  # "doc about free solo climbers, inspiring but not preachy",
  "YA fantasy with a chosen one prophecy, not too dark, PG-13",
  "set in Boston during a blizzard, but filmed in Toronto",
  "something on Netflix under 90 minutes",
  "critics hated it but I love it anyway, fun guilty pleasure",
  "Oscar-winning cinematography, but the story is messy",
  "real-time thriller in one apartment, ticking clock deadline, no flashbacks",
  "found footage horror, no jump scares, creepy dread",
  # "multiple POVs, unreliable narrator, twist ending explained at the end",
  "movies with Jack Sparrow energy but not Pirates, witty swashbuckling",
  "directed by Quinten Tarantino, snappy dialogue, violent but funny",
  "Her but not the one with Joaquin Phoenix",
  "ultra-gory body horror, disgusting, make me squirm",
  "background while coding, dialogue not important, chill visuals, low volume",
  "family movie night with kids, not babyish, jokes for adults too",
  "Korean audio with English subtitles, critics called it a slow-burn masterpiece",
  "adapted from a video game, big studio blockbuster, mixed reviews, amazing fight choreography",
  "set in ancient Rome, political betrayal, ends on a bleak note",
  "A24 vibe, but I want it less depressing, more hopeful"
]

In [3]:
# TESTING HOW LONG IT TAKES TO GENERATE 7 RESPONSES

import statistics

times = []
mean_justification_lengths = []

for query in tqdm(TEST_VECTOR_QUERIES[:7], desc="Testing vector subquery speed"):
    start = time.perf_counter()
    result = create_all_vector_subqueries(query=query)
    elapsed = time.perf_counter() - start
    times.append(elapsed)

    # Track mean length of justification field across all pieces in the result
    if result is not None:
        justification_lengths = [
            len(getattr(result.plot_events_data, "justification", "") or ""),
            len(getattr(result.plot_analysis_data, "justification", "") or ""),
            len(getattr(result.viewer_experience_data, "justification", "") or ""),
            len(getattr(result.watch_context_data, "justification", "") or ""),
            len(getattr(result.narrative_techniques_data, "justification", "") or ""),
            len(getattr(result.production_data, "justification", "") or ""),
            len(getattr(result.reception_data, "justification", "") or ""),
        ]
        mean_justification_lengths.append(statistics.mean(justification_lengths))

print(f"Median: {statistics.median(times):.2f}s")
print(f"Mean: {statistics.mean(times):.2f}s")
print(f"Mean justification length (chars): {statistics.mean(mean_justification_lengths):.1f}" if mean_justification_lengths else "No justifications to measure")

Testing vector subquery speed: 100%|██████████| 7/7 [00:19<00:00,  2.77s/it]

Median: 2.83s
Mean: 2.77s
Mean justification length (chars): 66.7





In [7]:
# ACTUALLY GENERATING THE VECTOR SUBQUERIES

vector_subquery_results = []

with futures.ThreadPoolExecutor(max_workers=6) as executor:
    # Submit extract_lexical_entities for each query
    future_to_query = {executor.submit(create_all_vector_subqueries, query): query for query in TEST_VECTOR_QUERIES}

    # Collect results as they complete, with progress bar
    for future in tqdm(
        futures.as_completed(future_to_query),
        total=len(TEST_VECTOR_QUERIES),
        desc="Processing vector subquery queries",
    ):
        query = future_to_query[future]
        try:
            results = future.result()
            vector_subquery_results.append((query, results))
        except Exception as e:
            print(f"Error processing query '{query}': {e}")

print(f"Generated {len(vector_subquery_results)} responses")

Processing vector subquery queries: 100%|██████████| 28/28 [00:12<00:00,  2.24it/s]

Generated 28 responses





In [8]:
# SAVE GENERATED VECTOR SUBQUERIES AS CSV

with open('../generated_data/vector_subquery_results.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["query", "results"])
    writer.writeheader()
    for query, results in vector_subquery_results:
        writer.writerow({"query": query, "results": results.model_dump_json()})

print(f"Saved {len(vector_subquery_results)} results to vector_subquery_results.csv")

Saved 28 results to vector_subquery_results.csv


## Vector Weights

In [5]:
# TESTING HOW LONG IT TAKES TO GENERATE 7 RESPONSES

import statistics

times = []
for query in tqdm(TEST_VECTOR_QUERIES[:7], desc="Testing vector weights speed"):
    start = time.perf_counter()
    create_all_vector_weights(query=query)
    elapsed = time.perf_counter() - start
    times.append(elapsed)

print(f"Median: {statistics.median(times):.2f}s")
print(f"Mean: {statistics.mean(times):.2f}s")

Testing vector weights speed: 100%|██████████| 7/7 [00:10<00:00,  1.54s/it]

Median: 1.47s
Mean: 1.54s





In [13]:
# Process all queries in parallel (each query runs its prompts in parallel internally)

vector_weights_results = []

with futures.ThreadPoolExecutor(max_workers=6) as executor:
    # Submit extract_lexical_entities for each query
    future_to_query = {executor.submit(create_all_vector_weights, query): query for query in TEST_VECTOR_QUERIES}

    # Collect results as they complete, with progress bar
    for future in tqdm(
        futures.as_completed(future_to_query),
        total=len(TEST_VECTOR_QUERIES),
        desc="Processing vector weights queries",
    ):
        query = future_to_query[future]
        try:
            results = future.result()
            vector_weights_results.append((query, results))
        except Exception as e:
            print(f"Error processing query '{query}': {e}")

print(f"Generated {len(vector_weights_results)} responses")

Processing vector weights queries: 100%|██████████| 28/28 [00:10<00:00,  2.79it/s]

Generated 28 responses





In [14]:
# Write results to CSV file

with open('../generated_data/vector_weights_results.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["query", "results"])
    writer.writeheader()
    for query, results in vector_weights_results:
        writer.writerow({"query": query, "results": results.model_dump_json()})

print(f"Processed {len(vector_weights_results)} queries and saved results to vector_weights_results.csv")

Processed 28 queries and saved results to vector_weights_results.csv


## Channel Weights

In [14]:
TEST_CHANNEL_WEIGHTS_QUERIES = [
    "Tom Hanks",
    "movies from the 90s on Netflix under 2 hours",
    "something cozy for a rainy Sunday that won't stress me out",
    "Pixar movies that will make me cry",
    "leandro dicaprio boat movie",
    "The Matrix",
    "critically acclaimed horror from the 2010s",
    "movies with unreliable narrators and twist endings",
    "trending action comedy",
    "something like John Wick but less violent",
    "French New Wave films",
    "kids movies in Spanish",
    "overrated superhero blockbusters",
    "Brad Pitt and George Clooney heist movies from before 2010",
    "disturbing psychological thrillers that mess with your head",
    "not another Marvel movie, give me something like Nolan's work but more accessible and under 2 hours",
    "that one where the city folds in on itself, not the animated one",
    "prestige TV vibes but it's a movie, ensemble cast, slow burn"
]

In [15]:
channel_weights_results = []

with futures.ThreadPoolExecutor(max_workers=25) as executor:
    # Submit extract_lexical_entities for each query
    future_to_query = {executor.submit(create_channel_weights, query): query for query in TEST_CHANNEL_WEIGHTS_QUERIES}

    # Collect results as they complete, with progress bar
    for future in tqdm(
        futures.as_completed(future_to_query),
        total=len(TEST_CHANNEL_WEIGHTS_QUERIES),
        desc="Processing channel weights queries",
    ):
        query = future_to_query[future]
        try:
            results = future.result()
            channel_weights_results.append((query, results))
        except Exception as e:
            print(f"Error processing query '{query}': {e}")

print(f"Generated {len(channel_weights_results)} responses")

Processing channel weights queries: 100%|██████████| 18/18 [00:01<00:00,  9.78it/s]

Generated 18 responses





In [16]:
# Write results to CSV: query plus each response's relevance fields

with open('../generated_data/channel_weights_results.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["query", "results"])
    writer.writeheader()
    for query, results in channel_weights_results:
        writer.writerow({"query": query, "results": results.model_dump_json()})
    
print(f"Processed {len(vector_weights_results)} queries and saved results to channel_weights_results.csv")

Processed 28 queries and saved results to channel_weights_results.csv


# Putting it all together

In [None]:
# SINGLE QUERY TEST

query = OVERALL_TEST_QUERIES[0]

overall = create_overall_query_understanding(
    query=query
)

print(query)
print(json.dumps(overall.model_dump(), indent=4))

In [9]:
# TESTING HOW LONG IT TAKES TO GENERATE 7 RESPONSES

import statistics

times = []
for query in tqdm(OVERALL_TEST_QUERIES[:7], desc="Testing overall query speed"):
    start = time.perf_counter()
    create_overall_query_understanding(query=query)
    elapsed = time.perf_counter() - start
    times.append(elapsed)

print(f"Median: {statistics.median(times):.2f}s")
print(f"Mean: {statistics.mean(times):.2f}s")

Testing overall query speed: 100%|██████████| 7/7 [00:18<00:00,  2.63s/it]

Median: 2.39s
Mean: 2.63s





In [None]:
# PROCESS ALL TEST QUERIES (DO 8 PER MINUTE)

MIN_SECONDS_PER_QUERY = 60 / 8  # 7.5 seconds between iterations (8 per 60 seconds)


def _serialize_for_json(obj: Any) -> Any:
    """Convert Pydantic models and nested structures to JSON-serializable dicts."""
    if hasattr(obj, "model_dump"):
        return obj.model_dump(mode="json")
    if isinstance(obj, dict):
        return {k: _serialize_for_json(v) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return [_serialize_for_json(v) for v in obj]
    return obj


overall_results = []
for query in tqdm(OVERALL_TEST_QUERIES, desc="Processing overall test queries"):
    start = time.perf_counter()
    result = create_overall_query_understanding(query)
    elapsed = time.perf_counter() - start

    if result is None:
        overall_results.append({"query": query, "results": json.dumps({"error": "Query understanding failed"})})
    else:
        # Build JSON-serializable dict compatible with Gradio viewer (vector_routing = vector_subqueries)
        vw_serialized = _serialize_for_json(result.vector_weights)
        # Gradio expects keys like "plot_events"; schema has "plot_events_data". Add aliases.
        for key in list(vw_serialized.keys()):
            if key.endswith("_data"):
                vw_serialized[key[:-5]] = vw_serialized[key]
        serialized = {
            "channel_weights": _serialize_for_json(result.channel_weights),
            "lexical_entities": _serialize_for_json(result.lexical_entities),
            "metadata_preferences": _serialize_for_json(result.metadata_preferences),
            "vector_routing": _serialize_for_json(result.vector_subqueries),
            "vector_weights": vw_serialized,
        }
        overall_results.append({"query": query, "results": json.dumps(serialized)})

    # Rate limit: ensure at least MIN_SECONDS_PER_QUERY between iterations
    sleep_time = MIN_SECONDS_PER_QUERY - elapsed
    if sleep_time > 0:
        time.sleep(sleep_time)

print(f"Generated {len(overall_results)} responses")

Processing overall test queries:   0%|          | 0/25 [00:00<?, ?it/s]

Processing overall test queries: 100%|██████████| 25/25 [03:34<00:00,  8.58s/it]

Generated 25 responses





In [5]:
csv_path = Path("../generated_data/overall_results.csv")
with open(csv_path, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["query", "results"])
    writer.writeheader()
    writer.writerows(overall_results)

print(f"Saved {len(overall_results)} results to {csv_path.resolve()}")

Saved 25 results to /Users/michaelkeohane/Documents/movie-finder-rag/implementation/generated_data/overall_results.csv


In [None]:
# Gradio interface for overall_results.csv

import pandas as pd
import json
import gradio as gr
from pathlib import Path


def _format_value(val, indent=0):
    """Recursively format a value for display (handles dicts, lists, None)."""
    if val is None:
        return "_none_"
    if isinstance(val, bool):
        return str(val)
    if isinstance(val, (int, float)):
        return str(val)
    if isinstance(val, str):
        return val
    if isinstance(val, list):
        if not val:
            return "[]"
        items = [_format_value(v, indent + 1) for v in val]
        return "\n" + "  " * (indent + 1) + ("\n" + "  " * (indent + 1)).join(f"- {x}" for x in items)
    if isinstance(val, dict):
        lines = []
        for k, v in val.items():
            if v is None or v == "" or v == [] or v == {}:
                continue
            lines.append(f"**{k.replace('_', ' ').title()}:** {_format_value(v, indent + 1)}")
        return "\n" + "  " * (indent + 1) + ("\n" + "  " * (indent + 1)).join(lines)
    return str(val)


def _format_metadata_preferences(mp: dict) -> str:
    """
    Format metadata preferences as a bulleted list grouped by high-level keys.
    Each top-level key (e.g., release_date_preference) becomes a bullet group
    with its sub-items as nested bullets.
    """
    lines = []
    for key, val in mp.items():
        label = key.replace("_", " ").title()
        if val is None:
            continue
        if isinstance(val, dict):
            sub_items = []
            for k, v in val.items():
                sub_label = k.replace("_", " ").title()
                if isinstance(v, list):
                    sub_val = ", ".join(str(x) for x in v) if v else "(none)"
                    sub_items.append(f"- **{sub_label}:** {sub_val}")
                elif v is None:
                    sub_items.append(f"- **{sub_label}:** (none)")
                else:
                    sub_items.append(f"- **{sub_label}:** {v}")
            if sub_items:
                lines.append(f"- **{label}**")
                lines.extend(f"  {s}" for s in sub_items)
        elif isinstance(val, list):
            lines.append(f"- **{label}:** {', '.join(str(x) for x in val) if val else '(none)'}")
        else:
            lines.append(f"- **{label}:** {val}")
    return "\n".join(lines) if lines else "_No metadata preferences._"


def _escape_html(text: str) -> str:
    """Escape HTML special chars so content doesn't break tags."""
    return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")

def _collapsible_section(title: str, content: str, open_by_default: bool = False) -> str:
    """Build HTML collapsible section for justifications."""
    open_attr = " open" if open_by_default else ""
    safe_content = _escape_html(content)
    return f'''
<details{open_attr}>
<summary><strong>{title}</strong></summary>
<p style="margin: 0.5em 0; line-height: 1.5;">{safe_content}</p>
</details>
'''


def format_overall_results(query: str, df: pd.DataFrame) -> str:
    """
    Format the query results from overall_results.csv for display.
    Justifications are placed in collapsible <details> sections.
    
    Args:
        query: The selected query string
        df: DataFrame loaded from overall_results.csv
        
    Returns:
        HTML/Markdown string with formatted results
    """
    row = df[df["query"] == query]
    if row.empty:
        return "Query not found."
    
    result_str = row.iloc[0]["results"]
    try:
        data = json.loads(result_str)
    except (json.JSONDecodeError, TypeError):
        return f"Error parsing results for query: {query}"
    
    parts = [f"# Query\n\n> **{query}**\n"]
    
    # Channel weights
    cw = data.get("channel_weights")
    if cw is not None:
        # Handle format: [query, dict] or just dict
        weights = cw[1] if isinstance(cw, list) and len(cw) > 1 else (cw if isinstance(cw, dict) else {})
        if weights:
            parts.append("## Channel Weights\n")
            parts.append(f"- **Lexical relevance:** {weights.get('lexical_relevance', 'N/A')}")
            parts.append(f"- **Metadata relevance:** {weights.get('metadata_relevance', 'N/A')}")
            parts.append(f"- **Vector relevance:** {weights.get('vector_relevance', 'N/A')}\n")
    
    # Lexical entities
    le = data.get("lexical_entities", {})
    entity_candidates = le.get("entity_candidates", [])
    parts.append("## Lexical Entities\n")
    if entity_candidates:
        for e in entity_candidates:
            parts.append(f"- **{e.get('corrected_and_normalized_entity', 'N/A')}** ({e.get('most_likely_category', '')})")
    else:
        parts.append("_No entities extracted._\n")
    
    # Metadata preferences (bulleted list grouped by high-level keys)
    mp = data.get("metadata_preferences", {})
    if mp:
        parts.append("## Metadata Preferences\n")
        parts.append(_format_metadata_preferences(mp) + "\n")
    
    # Vectors: combined weight + subquery + justifications per collection
    vr = data.get("vector_routing", {}) or {}
    vw = data.get("vector_weights", {}) or {}
    vector_collections = [
        ("plot_events_data", "plot_events", "Plot Events"),
        ("plot_analysis_data", "plot_analysis", "Plot Analysis"),
        ("viewer_experience_data", "viewer_experience", "Viewer Experience"),
        ("watch_context_data", "watch_context", "Watch Context"),
        ("narrative_techniques_data", "narrative_techniques", "Narrative Techniques"),
        ("production_data", "production", "Production"),
        ("reception_data", "reception", "Reception"),
    ]
    if vr or vw:
        parts.append("## Vectors\n")
        for routing_key, weight_key, label in vector_collections:
            routing_val = vr.get(routing_key)
            weight_val = vw.get(weight_key)
            if routing_val is None and weight_val is None:
                continue
            parts.append(f"### {label}\n")
            # Weight (relevance)
            if isinstance(weight_val, str):
                parts.append(f"**Weight:** _Error: {weight_val}_\n")
            elif isinstance(weight_val, dict):
                relevance = weight_val.get("relevance", "N/A")
                weight_just = weight_val.get("justification", "")
                parts.append(f"**Weight:** {relevance}\n")
                if weight_just:
                    parts.append(_collapsible_section("Weight justification", weight_just))
            else:
                parts.append("**Weight:** N/A\n")
            # Subquery
            if isinstance(routing_val, dict):
                subquery = routing_val.get("relevant_subquery_text") or "_none_"
                subquery_just = routing_val.get("justification", "")
                parts.append(f"**Subquery:** `{subquery}`\n")
                if subquery_just:
                    parts.append(_collapsible_section("Subquery justification", subquery_just))
            elif routing_val is None and isinstance(weight_val, dict):
                parts.append("**Subquery:** _none_\n")
            parts.append("\n")
    
    return "\n".join(parts)


# Load overall_results.csv
csv_path = Path("../generated_data/overall_results.csv")
if not csv_path.exists():
    raise FileNotFoundError(f"overall_results.csv not found at {csv_path.resolve()}")

df_overall = pd.read_csv(csv_path)
query_choices = df_overall["query"].tolist()

# Build Gradio interface
with gr.Blocks(title="Query Understanding Results", theme=gr.themes.Soft()) as overall_interface:
    gr.Markdown("# Query Understanding Results Viewer")
    gr.Markdown("Select a query to view its full extraction results. Justifications are in collapsible sections.")
    
    query_dropdown = gr.Dropdown(
        choices=query_choices,
        value=query_choices[0] if query_choices else None,
        label="Select Query",
        allow_custom_value=False,
    )
    
    results_output = gr.Markdown(
        value=format_overall_results(query_choices[0], df_overall) if query_choices else "No data loaded."
    )
    
    def on_query_change(query):
        if not query:
            return "Select a query."
        return format_overall_results(query, df_overall)
    
    query_dropdown.change(fn=on_query_change, inputs=[query_dropdown], outputs=[results_output])

# Launch (use share=False for local only)
overall_interface.launch()

* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


