# Qdrant Search Testing

In [1]:
import sys
import json

from qdrant_client import AsyncQdrantClient
from pathlib import Path
from typing import Optional, Sequence
from tqdm.asyncio import tqdm

# Add parent directory to path to import from implementation package
# Notebooks are in implementation/notebooks/, so we go up two levels to project root
sys.path.insert(0, str(Path().resolve().parent.parent))

from implementation.classes.movie import BaseMovie
from implementation.classes.enums import EntityCategory, Genre
from db.vector_search import run_vector_search
from db.ingest_movie import ingest_movie_to_qdrant, ingest_movies_to_qdrant_batched
from implementation.classes.schemas import MetadataFilters, ExtractedEntitiesResponse, LexicalCandidate, ExtractedEntityData
from implementation.misc.helpers import tokenize_title_phrase, create_watch_provider_offering_key
from implementation.classes.watch_providers import FILTERABLE_WATCH_PROVIDER_IDS

# Qdrant Initialization
qdrant_client = AsyncQdrantClient(host="localhost", port=6333)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# LOAD MOVIES

json_path = Path("../../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [BaseMovie(**movie_dict) for movie_dict in movies_data]

len(movies)

50

## Ingesting

In [3]:
# # Batched
# await ingest_movies_to_qdrant_batched(movies)

# # Single
# # _ = await tqdm.gather(*[ingest_movie_to_qdrant(movie) for movie in movies], desc="Ingesting movies (Qdrant)")

## Running Searches

In [30]:
search_query = "Manly action movies from the 80s"
metadata_filters = MetadataFilters(
    watch_offer_keys=[40449,162],
)

vector_search_results = await run_vector_search(
    query=search_query,
    metadata_filters=metadata_filters,
    qdrant_client=qdrant_client,
    original_limit=10,
    subquery_limit=10,
    anchor_limit=10,
)

debug_data = vector_search_results.debug
print(f"Vector search produced {debug_data.total_candidates} candidates in {debug_data.wall_clock_ms}ms")

Subquery LLM returned relevant_subquery_text as None for plot_events, skipping subquery search.
Subquery LLM returned relevant_subquery_text as None for narrative_techniques, skipping subquery search.
Subquery LLM returned relevant_subquery_text as None for watch_context, skipping subquery search.
Weight LLM returned not_relevant for narrative_techniques, skipping original-query search.
Subquery LLM returned relevant_subquery_text as None for plot_analysis, skipping subquery search.
Vector search complete: 10 jobs, 22 unique candidates, 3023.03ms wall clock
Vector search produced 22 candidates in 3023.03ms


In [4]:
vector_search_results.vector_weights

VectorWeights(plot_events_weight=<RelevanceSize.SMALL: 'small'>, plot_analysis_weight=<RelevanceSize.MEDIUM: 'medium'>, viewer_experience_weight=<RelevanceSize.MEDIUM: 'medium'>, watch_context_weight=<RelevanceSize.MEDIUM: 'medium'>, narrative_techniques_weight=<RelevanceSize.NOT_RELEVANT: 'not_relevant'>, production_weight=<RelevanceSize.LARGE: 'large'>, reception_weight=<RelevanceSize.MEDIUM: 'medium'>)

In [31]:
for movie_id, scores in vector_search_results.candidates.items():
    movie = [m for m in movies if m.tmdb_id == movie_id][0]
    print(movie.title)
    print(scores.to_nonzero_string())

the naked gun: from the files of police squad!
CandidateVectorScores(
    anchor_score_original: 0.40173694
    plot_events_score_original: 0.32224137
    production_score_original: 0.36678886
    production_score_subquery: 0.27679667
)
raiders of the lost ark
CandidateVectorScores(
    anchor_score_original: 0.3930955
    plot_events_score_original: 0.25136292
    plot_analysis_score_original: 0.29657632
    viewer_experience_score_original: 0.31627744
    viewer_experience_score_subquery: 0.681309
    watch_context_score_original: 0.42896044
    production_score_original: 0.36016834
    production_score_subquery: 0.2427434
    reception_score_original: 0.34599662
    reception_score_subquery: 0.52882093
)
mad max: fury road
CandidateVectorScores(
    anchor_score_original: 0.3666297
    plot_events_score_original: 0.27538878
    plot_analysis_score_original: 0.3441716
    viewer_experience_score_original: 0.34477052
    viewer_experience_score_subquery: 0.7306833
    watch_context_sc

In [10]:
debug_data = vector_search_results.debug
print(f"Vector search produced {debug_data.total_candidates} candidates in {debug_data.wall_clock_ms}ms")
print(f"Total jobs executed: {debug_data.total_jobs_executed}")
print("== Individual Job Data ==")
for job in debug_data.per_job_stats:
    print(f"   {job.score_field} generated {job.candidates_returned} candidates in {job.latency_ms}ms")
    print(f"   Query Ran: {job.query_string}")
    print(f"   LLM Time: {job.llm_generation_time_ms}ms. Embedding Time: {job.embedding_time_ms}ms")
    print("-" * 75)
print("== Errors ==")
for error in debug_data.errors:
    print(f"   {error}")

Vector search produced 5 candidates in 4092.22ms
Total jobs executed: 9
== Individual Job Data ==
   anchor_score_original generated 5 candidates in 119.17ms
   Query Ran: Manly action movies from the 80s
   LLM Time: Nonems. Embedding Time: 316.49ms
---------------------------------------------------------------------------
   plot_events_score_original generated 5 candidates in 15.17ms
   Query Ran: Manly action movies from the 80s
   LLM Time: 1413.11ms. Embedding Time: 316.49ms
---------------------------------------------------------------------------
   plot_analysis_score_original generated 5 candidates in 9.89ms
   Query Ran: Manly action movies from the 80s
   LLM Time: 2322.68ms. Embedding Time: 316.49ms
---------------------------------------------------------------------------
   viewer_experience_score_original generated 5 candidates in 27.56ms
   Query Ran: Manly action movies from the 80s
   LLM Time: 2050.65ms. Embedding Time: 316.49ms
----------------------------------