# Metadata Preferences Explorations

In [1]:
import sys
import json

from qdrant_client import AsyncQdrantClient
from pathlib import Path
from typing import Optional, Sequence
from tqdm.asyncio import tqdm

# Add parent directory to path to import from implementation package
# Notebooks are in implementation/notebooks/, so we go up two levels to project root
sys.path.insert(0, str(Path().resolve().parent.parent))

from implementation.classes.movie import BaseMovie
from implementation.classes.enums import (
    EntityCategory, 
    Genre,
    MaturityRating,
    MetadataPreferenceName,
)
from db.vector_search import run_vector_search
from db.vector_scoring import calculate_vector_scores
from implementation.llms.query_understanding_methods import extract_single_metadata_preference
from implementation.classes.languages import Language
from db.ingest_movie import ingest_movie_to_qdrant, ingest_movies_to_qdrant_batched
from implementation.classes.schemas import MetadataFilters, ExtractedEntitiesResponse, LexicalCandidate, ExtractedEntityData
from implementation.misc.helpers import tokenize_title_phrase, create_watch_provider_offering_key
from implementation.classes.watch_providers import FILTERABLE_WATCH_PROVIDER_IDS
from db.qdrant import qdrant_client
from db.search import search
from db.postgres import pool
from db.lexical_search import lexical_search

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
MaturityRating.from_string_with_default(value="unrated")

<MaturityRating.UNRATED: 'unrated'>

In [8]:
# LOAD MOVIES

json_path = Path("../../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [BaseMovie(**movie_dict) for movie_dict in movies_data]

for i, movie in enumerate(movies):
    print(f"{i}: {movie.title} ({movie.tmdb_id})")

0: ferris bueller's day off (9377)
1: zootopia (269149)
2: school of rock (1584)
3: frozen (109445)
4: the princess bride (2493)
5: coco (354912)
6: klaus (508965)
7: up (14160)
8: mulan (10674)
9: shrek (808)
10: the year without a santa claus (13397)
11: mad max: fury road (76341)
12: raiders of the lost ark (85)
13: the dark knight (155)
14: john wick (245891)
15: captain america: the first avenger (1771)
16: spider-man: across the spider-verse (569094)
17: avengers: endgame (299534)
18: star wars (11)
19: harry potter and the philosopher's stone (671)
20: the lord of the rings: the fellowship of the ring (120)
21: gladiator (98)
22: inception (27205)
23: the matrix (603)
24: interstellar (157336)
25: blade runner 2049 (335984)
26: jurassic park (329)
27: arrival (329865)
28: hereditary (493922)
29: the shining (694)
30: insidious (49018)
31: terrifier 3 (1034541)
32: saw (176)
33: se7en (807)
34: parasite (496243)
35: get out (419430)
36: american psycho (1359)
37: fight club (550)

In [6]:
maturity_preferences = extract_single_metadata_preference(
    query="Good for the kids",
    preference_name=MetadataPreferenceName.MATURITY_RATING,
)

In [8]:
if not maturity_preferences.result:
    print("No maturity preference found")
else:
    maturity_enum = MaturityRating.from_string_with_default(maturity_preferences.result.rating)
    print(f"Maturity preference: {maturity_enum} ({maturity_preferences.result.match_operation})")


Maturity preference: MaturityRating.PG (less_than_or_equal)
