In [None]:
import json
from pathlib import Path
from classes import IMDBMovie
from vectorize import (
    create_dense_anchor_vector_text,
    create_and_save_dense_anchor_vector,
    create_and_save_dense_content_vector,
    create_and_save_dense_vibe_vector,
    fetch_all_vectors_from_chroma,
    search_similar_vectors,
    create_dense_content_vector_text,
    create_dense_vibe_vector_text
)
from visualize import visualize_vectors_tsne
from llm_generations import generate_plot_summary, generate_vibe_summary

# Load movies from JSON file (relative to notebook location)
json_path = Path("../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [IMDBMovie(**movie_dict) for movie_dict in movies_data]

print(f"Loaded {len(movies)} movies")

Loaded 16 movies


In [2]:
# print(create_dense_content_vector_text(movies[0]))
movie = movies[0]
print(f"Generating vibe text for {movie.title}")

create_dense_vibe_vector_text(movie)

Generating vibe text for hereditary


'Vibe summary: a tense, engulfing experience that gnaws at you with slow-building dread and unsettling, sensory horror.\n\nVibe keywords: slow-burn, claustrophobic, haunting, disturbing, psychological, oppressive, visceral, cinematic, surreal, unsettling\n\nWatch context: late-night watch, intense family drama, quietly terrifying, requires attention, solo viewing\n\nMaturity guidance: Rated r. rated r for horror violence, disturbing images, language, drug use and brief graphic nudity\nDuration: long\nGenres: drama, horror, mystery, thriller'

In [3]:
print(f"vibe_summary: {summary.vibe_summary}")
print(f"vibe_keywords: {summary.vibe_keywords}")
print(f"watch_context_tags: {summary.watch_context_tags}")

vibe_summary: a moodily enchanted, immersive journey that tightens focus and grows with you from whimsy to edge-of-seat intensity.
vibe_keywords: ['dreamlike', 'slow-burn', 'adventurous', 'emotional', 'visual feast', 'imaginative', 'mysterious', 'whimsical', 'tense', 'transformative']
watch_context_tags: ['family movie night', 'cozy but tense', 'visual feast', 'requires attention', 'imaginative world']


In [2]:
movie = movies[3]

print(movie.title)
print(create_dense_content_vector_text(movie))

everything everywhere all at once
Movie: everything everywhere all at once (everything everywhere all at once)

Synopsis: Evelyn Wang, a middle‑aged Chinese American laundromat owner, faces an IRS audit, a failing marriage to Waymond, a demanding father, and a fractured relationship with daughter Joy. During the audit, a multiverse rift pulls Evelyn into parallel lives where she makes different choices: she could be a kung fu master, a film star, or a survivor of other fates. Alpha‑Waymond explains verse‑jumping—exploiting improbably timed actions to access alternate selves. Jobu Tupaki, a splintered Joy, wields an everything bagel that could swallow the multiverse. In the alphaverse, the two entangled selves must stop Jobu and her minions from destroying reality. Evelyn’s original body remains in the IRS building; her mind hops across universes to gain skills and insight. After a confrontation with alpha‑Dierdre and a chase by alpha‑Gong Gong, Evelyn’s mind fractures and she experienc

In [None]:
# EMBEDDING ALL VECTORS

for movie in movies:
    # create_and_save_dense_anchor_vector(movie)
    # create_and_save_dense_content_vector(movie)
    creae

Processing: the dark knight (ID: tt0468569)
  Creating embedding...
  Embedding created: [-0.05128815025091171, 0.04320494830608368, -0.05114633962512016, -0.006363748572766781, -0.03840702399611473, -0.008591355755925179, 0.014760115183889866, 0.018778080120682716, 0.003944105934351683, 0.014736480079591274, 0.023576004430651665, -0.04587571322917938, -0.056440599262714386, -0.007829124107956886, 0.03360909968614578, -0.0025274776853621006, -0.06230210140347481, 0.0654219314455986, -0.010570795275270939, 0.024025071412324905, -0.012751132249832153, 0.021933365613222122, 0.01753723807632923, -0.016840003430843353, -0.06230210140347481, 0.017702683806419373, -0.029331149533391, -0.019498951733112335, 0.04540301114320755, 0.04698656126856804, 0.010830780491232872, -0.013200199231505394, 0.018754445016384125, 0.013448366895318031, -0.015811877325177193, -0.04112505912780762, -0.003991376142948866, -0.01870717480778694, 0.019971651956439018, -0.029331149533391, 0.001660365262068808, -0.068

In [3]:
# FETCHING ALL VECTORS
# vector_collection = fetch_all_vectors_from_chroma(collection_name="dense_anchor_vectors")
vector_collection = fetch_all_vectors_from_chroma(collection_name="dense_content_vectors")

In [4]:
visualize_vectors_tsne(vector_collection, n_components=3)

Reducing 16 vectors of dimension 1536 to 3D using t-SNE...
  Adjusted perplexity from 30.0 to 15 (n_vectors=16)
✓ t-SNE reduction complete


In [7]:
index = 14
n_results = 5

searching_name = vector_collection.metadatas[index].get("title")
print(f"Getting closest vectors to {searching_name}")
searching_vector = vector_collection.embeddings[index]
results = search_similar_vectors(searching_vector, 
    collection_name="dense_anchor_vectors", 
    n_results=n_results,
    ids_to_filter_out=[vector_collection.ids[index]]
)

print(f"The {n_results} closest vector(s) to {searching_name} are:")
for i, result in enumerate(results.metadatas):
    title = result.get('title')
    # Print distance if available (use getattr to handle cases where attribute might not exist)
    distances = getattr(results, 'distances', None)
    if distances is not None and i < len(distances):
        distance = distances[i]
        print(f"{i+1}. {title} (distance: {distance:.4f})")
    else:
        print(f"{i+1}. {title}")

# search_similar_vectors()

Getting closest vectors to poltergeist
The 5 closest vector(s) to poltergeist are:
1. hereditary (distance: 0.8723)
2. everything everywhere all at once (distance: 1.1252)
3. spirited away (distance: 1.1717)
4. the iron giant (distance: 1.1917)
5. spider-man: across the spider-verse (distance: 1.2172)
