In [1]:
import json
from pathlib import Path
from classes import IMDBMovie
from vectorize import (
    create_dense_anchor_vector_text,
    create_and_save_dense_anchor_vector,
    create_and_save_dense_content_vector,
    clear_collections_from_chroma,
    create_and_save_dense_vibe_vector,
    fetch_all_vectors_from_chroma,
    search_similar_vectors,
    create_dense_content_vector_text,
    create_dense_vibe_vector_text
)
from visualize import visualize_vectors_tsne
from llm_generations import generate_plot_summary, generate_vibe_summary

# Load movies from JSON file (relative to notebook location)
json_path = Path("../saved_imdb_movies.json")
with open(json_path, "r", encoding="utf-8") as f:
    movies_data = json.load(f)

# Convert each dictionary to an IMDBMovie object
movies = [IMDBMovie(**movie_dict) for movie_dict in movies_data]

print(f"Loaded {len(movies)} movies")

Loaded 49 movies


In [2]:
# print(create_dense_content_vector_text(movies[0]))
movie = movies[4]
print(f"Generating vibe text for {movie.title}")

create_dense_vibe_vector_text(movie)

Generating vibe text for klaus


'Vibe keywords: mood_atmosphere: whimsical, tonal_valence: hopeful, pacing_momentum: steady, kinetic_intensity: mellow, tension_pressure: uneasy, unpredictability_twistiness: straightforward, scariness_level: harmless, fear_mode: jumpy, humor_level: funny, humor_flavor: wholesome, violence_intensity: nonviolent, gore_body_grossness: clean, romance_prominence: present, romance_tone: tender, emotional_heaviness: moving, emotional_volatility: even-keeled, weirdness_surrealism: grounded, attention_demand: easywatch, narrative_complexity: straightforward, ambiguity_interpretive_ness: clear, sense_of_scale: grand\n\nGenres: animation, adventure, comedy, family, fantasy'

In [3]:
movie = movies[3]

print(movie.title)
print(create_dense_content_vector_text(movie))

the dark knight
Movie: the dark knight (the dark knight)

Synopsis: Gotham endures a mob bank heist gone wrong, the Joker covertly exploiting the chaos to incite anarchy; Batman confronts him, while Harvey Dent and Jim Gordon coordinate a crackdown on crime. Batman tracks Lau in Hong Kong to recover mob funds and force Dent’s team to leverage a Rico case against the mob. Lau’s testimony leads to mass arrests, but the Joker retaliates with assassinations and public threats, forcing Batman to reveal his secret identity to protect those he loves. A hospital bombing and a two-firth finale push Dent toward vigilante justice as Two-Face, while Batman grapples with whether to expose himself, or let Gotham choose its own protector. The Joker destabilizes Gotham’s institutions, kills Rachel, disfigures Dent, and engineers a crisis that forces Gordon to sacrifice truth for the city’s fragile order; Batman ultimately captures the Joker, Dent’s legend complicates heroism, and Gotham’s leadership c

In [3]:
# EMBEDDING ALL VECTORS

for movie in movies:
    # create_and_save_dense_anchor_vector(movie)
    # create_and_save_dense_content_vector(movie)
    create_and_save_dense_vibe_vector(movie)

Processing: spirited away (ID: tt0245429)
  Creating embedding...
  Embedding created: [0.017001964151859283, 0.07082083821296692, -0.0683387964963913, -0.005474274046719074, -0.03800277039408684, -0.0029232900124043226, 0.019484004005789757, 0.012292985804378986, 0.03171493858098984, 0.009824736043810844, 0.0029905117116868496, -0.02098701521754265, -0.022903701290488243, -0.005091626662760973, 0.01074860617518425, 0.042360126972198486, -0.01856013387441635, 0.021055961027741432, -0.01624356396496296, 0.007177228108048439, 0.010445245541632175, -0.0037196106277406216, -0.038581911474466324, 0.026254452764987946, -0.01839466392993927, 0.003068075515329838, -0.015388639643788338, 0.04200160875916481, 0.05824517458677292, 0.027123166248202324, -0.0016176342032849789, -0.021097328513860703, 0.020862912759184837, -0.04478700831532478, -0.004757240880280733, -0.019373690709471703, -0.06679441779851913, -0.02162131480872631, 0.013823576271533966, -0.07506787776947021, 0.016629658639431, -0.0

In [8]:
# CLEARS ALL VECTORS BE CAREFUL!!!
clear_collections_from_chroma(collection_names=["dense_vibe_vectors"])

✓ Cleared 49 vector(s) from collection 'dense_vibe_vectors'


In [6]:
# FETCHING ALL VECTORS
# vector_collection = fetch_all_vectors_from_chroma(collection_name="dense_anchor_vectors")
vector_collection = fetch_all_vectors_from_chroma(collection_name="dense_vibe_vectors")

In [4]:
visualize_vectors_tsne(vector_collection, n_components=3)

ValueError: vector_collection.embeddings cannot be empty

In [8]:
index = 14
n_results = 5

searching_name = vector_collection.metadatas[index].get("title")
print(f"Getting closest vectors to {searching_name}")
searching_vector = vector_collection.embeddings[index]
results = search_similar_vectors(searching_vector, 
    collection_name="dense_vibe_vectors", 
    n_results=n_results,
    ids_to_filter_out=[vector_collection.ids[index]]
)

print(f"The {n_results} closest vector(s) to {searching_name} are:")
for i, result in enumerate(results.metadatas):
    title = result.get('title')
    # Print distance if available (use getattr to handle cases where attribute might not exist)
    distances = getattr(results, 'distances', None)
    if distances is not None and i < len(distances):
        distance = distances[i]
        print(f"{i+1}. {title} (distance: {distance:.4f})")
    else:
        print(f"{i+1}. {title}")

# search_similar_vectors()

Getting closest vectors to the lego movie
The 5 closest vector(s) to the lego movie are:
1. inside out (distance: 0.1341)
2. toy story (distance: 0.1589)
3. the wild robot (distance: 0.2181)
4. spirited away (distance: 0.2212)
5. the iron giant (distance: 0.2218)
