In [9]:
#  Install & Import Libraries

# Install required libraries
!pip install -q transformers scikit-learn joblib

import os
import re
import ast
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from transformers import pipeline
import joblib


In [10]:
#  Download MovieLens & Clean Movie Data

# 1. Download & unzip MovieLens "latest small" dataset
!wget -q http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -O ml-latest-small.zip
!unzip -o -q ml-latest-small.zip

# 2. Load movies & ratings
raw_movies_path = "ml-latest-small/movies.csv"
raw_ratings_path = "ml-latest-small/ratings.csv"

movies_df = pd.read_csv(raw_movies_path)
ratings_df = pd.read_csv(raw_ratings_path)

print("Original movies shape:", movies_df.shape)
print("Original ratings shape:", ratings_df.shape)

# 3. Helper to clean title & extract year
def clean_title(title: str):
    """
    Extract the release year in brackets, e.g. 'Toy Story (1995)',
    and remove the ' (1995)' part from the title.
    Returns: (clean_title, year:int or None)
    """
    if not isinstance(title, str):
        return "", None

    # Find "(1995)" pattern
    year_match = re.search(r"\((\d{4})\)", title)
    if year_match:
        year = int(year_match.group(1))
        # correct regex - remove ' (1995)' safely
        title_cleaned = re.sub(r"\s*\(\d{4}\)", "", title).strip()
    else:
        year = None
        title_cleaned = title.strip()
    return title_cleaned, year

# Apply cleaning to all titles
title_year_df = movies_df["title"].apply(
    lambda x: pd.Series(clean_title(x), index=["title_cleaned", "release_year"])
)
movies_df = pd.concat([movies_df, title_year_df], axis=1)

# 4. Clean genres
# Replace "(no genres listed)" with NaN, then split on '|'
movies_df["genres"] = movies_df["genres"].replace("(no genres listed)", np.nan)
movies_df["genres"] = movies_df["genres"].apply(
    lambda x: x.split("|") if pd.notna(x) else []
)

# 5. Handle missing values & normalize types
movies_df["release_year"] = movies_df["release_year"].fillna(0).astype(int)

# 6. Save cleaned versions to a /data folder
os.makedirs("data", exist_ok=True)

movies_clean_path = "data/movies_cleaned.csv"
ratings_clean_path = "data/ratings_cleaned.csv"

movies_df.to_csv(movies_clean_path, index=False)
ratings_df.to_csv(ratings_clean_path, index=False)

print("Saved cleaned movies to:", movies_clean_path)
print("Saved cleaned ratings to:", ratings_clean_path)


Original movies shape: (9742, 3)
Original ratings shape: (100836, 4)
Saved cleaned movies to: data/movies_cleaned.csv
Saved cleaned ratings to: data/ratings_cleaned.csv


In [25]:
# Build TF-IDF Features, Similarity Matrix & Save Artifacts

# Reload cleaned movies to be safe
movies_df = pd.read_csv("data/movies_cleaned.csv")
ratings_df = pd.read_csv("data/ratings_cleaned.csv")

# If genres were saved as list-like strings, convert safely
def safe_parse_genres(val):
    """
    Makes sure genres are a Python list.
    Uses ast.literal_eval instead of eval for safety.
    """
    if isinstance(val, list):
        return val
    if isinstance(val, str) and val.startswith("[") and "]" in val:
        try:
            parsed = ast.literal_eval(val)  # SAFE (no eval)
            if isinstance(parsed, list):
                return parsed
        except Exception:
            pass
    # Fallback: if it's still a plain string like "Action|Adventure"
    if isinstance(val, str):
        return val.split("|")
    return []

movies_df["genres"] = movies_df["genres"].apply(safe_parse_genres)

# Build a text version of genres for TF-IDF
movies_df["genres_str"] = movies_df["genres"].apply(
    lambda lst: " ".join(g.lower().replace(" ", "") for g in lst)
)

# Combined text = cleaned title + genres_str
movies_df["title_cleaned"] = movies_df["title_cleaned"].fillna("")
movies_df["combined_features"] = (
    movies_df["title_cleaned"].astype(str) + " " + movies_df["genres_str"]
)

# 1. TF-IDF vectorizer on combined text
tfidf_vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["combined_features"])

# 2. Cosine similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

print("TF-IDF matrix shape:", tfidf_matrix.shape)
print("Cosine similarity matrix shape:", cosine_sim_matrix.shape)

# 3. Compute average rating per movie
average_ratings = ratings_df.groupby("movieId")["rating"].mean().rename("avg_rating")

# Align ratings with movies_df index by movieId
movies_with_ratings = movies_df.set_index("movieId").join(average_ratings)
movies_with_ratings["avg_rating"] = movies_with_ratings["avg_rating"].fillna(
    movies_with_ratings["avg_rating"].mean()
)

# 4. Save all artifacts into /models
os.makedirs("models", exist_ok=True)

joblib.dump(tfidf_vectorizer, "models/tfidf_vectorizer.pkl")
np.save("models/tfidf_matrix.npy", tfidf_matrix.toarray())
np.save("models/cosine_sim_matrix.npy", cosine_sim_matrix)

movies_with_ratings.reset_index().to_csv("models/loaded_movies_df.csv", index=False)

movies_with_ratings["avg_rating"].to_csv("models/average_movie_ratings.csv")

print("Saved vectorizer, matrices, and movie data into /models")


TF-IDF matrix shape: (9742, 8969)
Cosine similarity matrix shape: (9742, 9742)
Saved vectorizer, matrices, and movie data into /models


In [26]:
#  KMeans Vibe Clustering

import os
import numpy as np
import pandas as pd
import joblib
from sklearn.cluster import KMeans

# Folder where model files are stored
MODELS_DIR = "models"

TFIDF_MATRIX_PATH = os.path.join(MODELS_DIR, "tfidf_matrix.npy")
MOVIES_DF_PATH = os.path.join(MODELS_DIR, "loaded_movies_df.csv")
KMEANS_MODEL_PATH = os.path.join(MODELS_DIR, "kmeans_vibe_model.pkl")

# 1) Load TF-IDF matrix and movies dataframe
print("ðŸ”¹ Loading TF-IDF matrix and movies dataframe...")
tfidf_matrix = np.load(TFIDF_MATRIX_PATH)
movies_df = pd.read_csv(MOVIES_DF_PATH)

print("   TF-IDF shape :", tfidf_matrix.shape)
print("   Movies shape :", movies_df.shape)

# 2) Train KMeans to find 'vibe clusters'
#    We can change n_clusters if we want (e.g., 6, 8, 10)
n_clusters = 8

print(f"ðŸ”¹ Training KMeans with {n_clusters} clusters...")
kmeans = KMeans(
    n_clusters=n_clusters,
    random_state=42,
    n_init=10
)

cluster_labels = kmeans.fit_predict(tfidf_matrix)
print("   Sample cluster labels:", cluster_labels[:10])

# 3) Attach cluster labels to movies
movies_df["vibe_cluster"] = cluster_labels

# 4) Save updated movies file
movies_df.to_csv(MOVIES_DF_PATH, index=False)

# 5) Save the KMeans model
joblib.dump(kmeans, KMEANS_MODEL_PATH)

print("âœ… Done!")
print(f"   Added 'vibe_cluster' column to: {MOVIES_DF_PATH}")
print(f"   Saved KMeans model to: {KMEANS_MODEL_PATH}")


ðŸ”¹ Loading TF-IDF matrix and movies dataframe...
   TF-IDF shape : (9742, 8969)
   Movies shape : (9742, 8)
ðŸ”¹ Training KMeans with 8 clusters...
   Sample cluster labels: [3 3 2 2 7 0 2 3 0 0]
âœ… Done!
   Added 'vibe_cluster' column to: models/loaded_movies_df.csv
   Saved KMeans model to: models/kmeans_vibe_model.pkl


In [23]:
# Create backend/ai/emotion_detection.py

import os

os.makedirs("backend/ai", exist_ok=True)

script_path = os.path.join("backend", "ai", "emotion_detection.py")

script_code = '''"""Emotion detection and movie recommendation utilities for Vyber.

This module exposes helpers for the FastAPI backend:

- load_movies()         â†’ returns the movies DataFrame with ratings and genres
- detect_mood(text)     â†’ maps free-text input to one of our moods
- recommend(mood, ...)  â†’ returns a list of recommended movies for a mood
- surprise_me(mood, ...)â†’ returns one "surprise" movie using vibe clusters
"""

import os
import ast
import random
import numpy as np
import pandas as pd
from transformers import pipeline
import joblib

# --- Load precomputed artifacts (vectorizer, similarity matrix, movies) ---

MODELS_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "models")
MODELS_DIR = os.path.abspath(MODELS_DIR)

TFIDF_VECTORIZER_PATH = os.path.join(MODELS_DIR, "tfidf_vectorizer.pkl")
COSINE_SIM_MATRIX_PATH = os.path.join(MODELS_DIR, "cosine_sim_matrix.npy")
MOVIES_DF_PATH = os.path.join(MODELS_DIR, "loaded_movies_df.csv")

# Load TF-IDF vectorizer (kept for future use / compatibility)
tfidf_vectorizer = joblib.load(TFIDF_VECTORIZER_PATH)

# Load cosine similarity matrix
cosine_sim_matrix = np.load(COSINE_SIM_MATRIX_PATH)

# Load movies with ratings
movies_df = pd.read_csv(MOVIES_DF_PATH)

# Ensure genres are a proper Python list
def _ensure_genres_list(val):
    if isinstance(val, list):
        return val
    if isinstance(val, str) and val.startswith("[") and "]" in val:
        try:
            parsed = ast.literal_eval(val)
            if isinstance(parsed, list):
                return parsed
        except Exception:
            pass
    if isinstance(val, str):
        return val.split("|")
    return []

if "genres" in movies_df.columns:
    movies_df["genres"] = movies_df["genres"].apply(_ensure_genres_list)

# Ensure vibe_cluster column exists and is integer (for KMeans clustering)
if "vibe_cluster" in movies_df.columns:
    movies_df["vibe_cluster"] = movies_df["vibe_cluster"].fillna(-1).astype(int)
else:
    movies_df["vibe_cluster"] = -1

# --- Emotion model and mapping ---

# Pretrained HuggingFace model for emotion classification
emotion_pipeline = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base"
)

# Map fine-grained emotions â†’ 6 Vyber moods
emotion_to_mood_map = {
    "joy": "happy",
    "optimism": "happy",
    "admiration": "happy",
    "amusement": "happy",
    "surprise": "happy",
    "trust": "happy",
    "contentment": "happy",
    "love": "romantic",
    "caring": "romantic",
    "sadness": "sad",
    "grief": "sad",
    "disappointment": "sad",
    "anger": "action",
    "annoyance": "action",
    "disgust": "scary",
    "fear": "scary",
    "nervousness": "scary",
    "anticipation": "fantasy",
    "curiosity": "fantasy",
    "excitement": "fantasy",
}

# Fallback if nothing matches
DEFAULT_MOOD = "happy"

# Mood â†’ genres mapping
mood_to_genres_map = {
    "happy": ["Comedy", "Family", "Animation", "Romance"],
    "sad": ["Drama"],
    "romantic": ["Romance"],
    "action": ["Action", "Adventure", "Crime", "Sci-Fi"],
    "scary": ["Horror", "Thriller"],
    "fantasy": ["Fantasy", "Sci-Fi", "Adventure"]
}

def load_movies():
    """Return the full movies DataFrame used by the recommender."""
    return movies_df.copy()

# --- Helpers for emotion detection & explanations ---

def _extract_top_dict(results):
    """Safely pull out the top {label, score} dict from any nested list structure."""
    obj = results
    if isinstance(obj, dict):
        return obj
    while isinstance(obj, list) and len(obj) > 0:
        obj = obj[0]
        if isinstance(obj, dict):
            return obj
    return None


def detect_mood(text: str) -> str:
    """Detect a coarse mood (one of 6) from free-text input.

    Handles different output shapes from the HuggingFace pipeline.
    If anything goes wrong, returns DEFAULT_MOOD.
    """
    if not isinstance(text, str) or not text.strip():
        return DEFAULT_MOOD

    try:
        results = emotion_pipeline(text)
    except Exception:
        return DEFAULT_MOOD

    top = _extract_top_dict(results)
    if top is None:
        return DEFAULT_MOOD

    label = str(top.get("label", "")).lower()
    mood = emotion_to_mood_map.get(label, DEFAULT_MOOD)
    return mood


def build_explanation(
    title: str,
    mood: str,
    genres_list,
    avg_rating: float = None,
    rank: int = 1,
    user_text: str = None,
) -> str:
    """Generate a natural-language explanation for a recommendation."""

    mood = (mood or "").lower()

    # Genre phrase
    if genres_list:
        main_genre = genres_list[0]
        if len(genres_list) == 1:
            genre_phrase = f"{main_genre} movie"
        else:
            others = ", ".join(genres_list[1:])
            genre_phrase = f"{main_genre} movie with {others} elements"
    else:
        genre_phrase = "movie"

    # Rating phrase
    if avg_rating is not None:
        rating_phrase = f", rated about {avg_rating:.1f} by other viewers"
    else:
        rating_phrase = ""

    # Rank phrase
    if rank == 1:
        rank_phrase = " as a top pick for your mood"
    elif rank == 2:
        rank_phrase = " as another strong choice"
    elif rank == 3:
        rank_phrase = " as a good option to try next"
    else:
        rank_phrase = ""

    # Optional snippet from user text
    snippet = None
    if isinstance(user_text, str) and user_text.strip():
        cleaned = " ".join(user_text.strip().split())
        if len(cleaned) > 80:
            cleaned = cleaned[:77] + "..."
        snippet = cleaned

    # Template variations
    templates = [
        "Since your mood is {mood}, we picked {title}, a {genre_phrase}{rating_phrase}{rank_phrase}.",
        "Because you are feeling {mood}, {title} â€” a {genre_phrase}{rating_phrase} â€” should fit your vibe{rank_phrase}.",
        "To go with your {mood} mood, we suggest {title}, which is a {genre_phrase}{rating_phrase}{rank_phrase}.",
        "For this {mood} mood, {title} stands out as a {genre_phrase}{rating_phrase}{rank_phrase}.",
        "Given that you are feeling {mood}, {title} is a {genre_phrase} that many people enjoy{rating_phrase}{rank_phrase}.",
        "We matched your {mood} mood with {title}, a {genre_phrase}{rating_phrase}{rank_phrase}.",
    ]

    if snippet:
        templates.extend([
            'You mentioned "{snippet}", so we chose {title}, a {genre_phrase}{rating_phrase}{rank_phrase}.',
            'Based on what you said ("{snippet}"), {title} â€” a {genre_phrase}{rating_phrase} â€” should suit your mood{rank_phrase}.',
        ])

    template = random.choice(templates)

    explanation = template.format(
        mood=mood,
        title=title,
        genre_phrase=genre_phrase,
        rating_phrase=rating_phrase,
        rank_phrase=rank_phrase,
        snippet=snippet or "",
    )

    return explanation


#Recommendation logic

def recommend(
    mood: str,
    top_n: int = 5,
    weight_sim: float = 0.7,
    weight_rating: float = 0.3,
    user_text: str = None,
):
    """Recommend movies for a given mood.

    Combines cosine similarity (based on title + genres)
    and average rating to score movies, then returns a list
    of dicts with title, genres, mood, rating, vibe_cluster, explanation.
    """
    mood = (mood or "").lower()
    if mood not in mood_to_genres_map:
        mood = DEFAULT_MOOD

    target_genres = mood_to_genres_map[mood]

    # Simple genre filter: keep movies that contain at least one target genre
    def has_genre(genres):
        if not isinstance(genres, (list, tuple, set)):
            return False
        genres_lower = [str(g).lower() for g in genres]
        return any(tg.lower() in genres_lower for tg in target_genres)

    mask = movies_df["genres"].apply(has_genre)
    candidate_indices = movies_df.index[mask].tolist()

    if not candidate_indices:
        # Fallback: if no movie matches, just take all movies
        candidate_indices = list(movies_df.index)

    # Slice similarity matrix & ratings for the candidates
    sim_submatrix = cosine_sim_matrix[np.ix_(candidate_indices, candidate_indices)]

    # For simplicity, use the average similarity of each candidate to all others
    sim_scores = sim_submatrix.mean(axis=1)

    # Use avg_rating column if present, else fallback to ones
    if "avg_rating" in movies_df.columns:
        ratings = movies_df.loc[candidate_indices, "avg_rating"].values
    else:
        ratings = np.ones(len(candidate_indices))

    # Normalize scores to [0,1] to combine them
    def _normalize(x):
        x = np.asarray(x, dtype=float)
        if x.max() == x.min():
            return np.ones_like(x)
        return (x - x.min()) / (x.max() - x.min())

    sim_norm = _normalize(sim_scores)
    rating_norm = _normalize(ratings)

    final_scores = weight_sim * sim_norm + weight_rating * rating_norm

    # Sort candidates by score
    sorted_idx = np.argsort(final_scores)[::-1]  # descending
    top_idx = sorted_idx[:top_n]

    top_movie_indices = [candidate_indices[i] for i in top_idx]
    top_movies = movies_df.loc[top_movie_indices]

    results = []
    for rank, (_, row) in enumerate(top_movies.iterrows(), start=1):
        genres_val = row.get("genres", [])
        if isinstance(genres_val, str):
            try:
                parsed = ast.literal_eval(genres_val)
                if isinstance(parsed, list):
                    genres_val = parsed
                else:
                    genres_val = [genres_val]
            except Exception:
                genres_val = [genres_val]
        elif not isinstance(genres_val, (list, tuple, set)):
            genres_val = [str(genres_val)]

        genres_list = [str(g) for g in genres_val if g is not None]

        avg_rating = None
        if "avg_rating" in row and not pd.isna(row["avg_rating"]):
            avg_rating = float(row["avg_rating"])

        explanation = build_explanation(
            title=row["title"],
            mood=mood,
            genres_list=genres_list,
            avg_rating=avg_rating,
            rank=rank,
            user_text=user_text,
        )

        # Safe handling for vibe_cluster from the dataframe
        if "vibe_cluster" in row:
            vibe_val = row["vibe_cluster"]
        else:
            vibe_val = None

        try:
            vibe_cluster = int(vibe_val) if vibe_val is not None and not pd.isna(vibe_val) else None
        except Exception:
            vibe_cluster = None

        results.append({
            "title": row["title"],
            "genres": genres_list,
            "mood": mood,
            "avg_rating": avg_rating,
            "vibe_cluster": vibe_cluster,
            "explanation": explanation,
        })

    return results


def surprise_me(mood: str, user_text: str = None):
    """
    Surprise-me recommender that uses mood + vibe clusters.

    - Picks movies for the mood (like recommend)
    - Tries to choose one from a *different* vibe_cluster than the main top picks
      so it feels fresh but still relevant.
    """

    # Normalize mood
    mood = (mood or "").lower()
    if mood not in mood_to_genres_map:
        mood = DEFAULT_MOOD

    # 1) Use existing recommend() to see the main clusters
    base_recs = recommend(mood=mood, top_n=5, user_text=user_text)
    base_clusters = {
        r.get("vibe_cluster")
        for r in base_recs
        if r.get("vibe_cluster") is not None
    }

    # 2) Build candidate set: same moodâ€™s genre filter
    target_genres = mood_to_genres_map[mood]

    def has_genre(genres):
        if not isinstance(genres, (list, tuple, set)):
            return False
        genres_lower = [str(g).lower() for g in genres]
        return any(tg.lower() in genres_lower for tg in target_genres)

    mask = movies_df["genres"].apply(has_genre)
    candidates_df = movies_df[mask].copy()
    if candidates_df.empty:
        candidates_df = movies_df.copy()

    # 3) Prefer movies from a *different* vibe_cluster than main recs
    if "vibe_cluster" in candidates_df.columns and base_clusters:
        alt_candidates = candidates_df[~candidates_df["vibe_cluster"].isin(base_clusters)]
        if not alt_candidates.empty:
            candidates_df = alt_candidates

    # 4) Randomly pick ONE surprise movie
    surprise_row = candidates_df.sample(n=1, random_state=None).iloc[0]

    # 5) Clean genres to a list of strings
    genres_val = surprise_row.get("genres", [])
    if isinstance(genres_val, str):
        try:
            parsed = ast.literal_eval(genres_val)
            if isinstance(parsed, list):
                genres_val = parsed
            else:
                genres_val = [genres_val]
        except Exception:
            genres_val = [genres_val]
    elif not isinstance(genres_val, (list, tuple, set)):
        genres_val = [str(genres_val)]

    genres_list = [str(g) for g in genres_val if g is not None]

    # 6) Get rating
    avg_rating = None
    if "avg_rating" in surprise_row and not pd.isna(surprise_row["avg_rating"]):
        avg_rating = float(surprise_row["avg_rating"])

    # 7) Reuse explanation system
    explanation = build_explanation(
        title=surprise_row["title"],
        mood=mood,
        genres_list=genres_list,
        avg_rating=avg_rating,
        rank=1,
        user_text=user_text,
    )

    # 8) Return a single movie dict
    return {
        "title": surprise_row["title"],
        "genres": genres_list,
        "mood": mood,
        "avg_rating": avg_rating,
        "vibe_cluster": int(surprise_row["vibe_cluster"]) if "vibe_cluster" in surprise_row else None,
        "explanation": explanation,
    }
'''

with open(script_path, "w", encoding="utf-8") as f:
    f.write(script_code)

print(f"Written backend AI script to: {script_path}")


Written backend AI script to: backend/ai/emotion_detection.py


In [27]:
#  Full Vyber AI Pipeline Test

import importlib

# 1) Reload AI module
import backend.ai.emotion_detection as emo
importlib.reload(emo)

from backend.ai.emotion_detection import load_movies, detect_mood, recommend

print("âœ… Loaded module from:", emo.__file__)
print("Functions available:")
print(" - load_movies :", hasattr(emo, "load_movies"))
print(" - detect_mood :", hasattr(emo, "detect_mood"))
print(" - recommend   :", hasattr(emo, "recommend"))

# 2) Check movies dataframe and vibe_cluster
movies = load_movies()
print("\n Movies dataframe info:")
print("Shape:", movies.shape)
print("Columns:", list(movies.columns))

if "vibe_cluster" in movies.columns:
    print("\nâœ… 'vibe_cluster' column found.")
    print(movies[["title", "vibe_cluster"]].head())
else:
    print("\n 'vibe_cluster' column NOT found. ")

# 3) Define some sample user texts to test different moods
test_texts = [
    "I feel very low and lonely today. Nothing seems exciting.",
    "I am so happy and relaxed, just want something light and fun to watch.",
    "I am in the mood for love and a cosy romantic movie.",
    "I want something thrilling and full of action.",
    "I want something a bit dark and scary tonight.",
    "I want to escape into a magical fantasy world.",
]

# 4) Run end-to-end: text â†’ mood â†’ recommendations
for idx, user_text in enumerate(test_texts, start=1):
    print("\n" + "="*80)
    print(f"Test case #{idx}")
    print("User text:", user_text)

    # Detect mood
    mood = detect_mood(user_text)
    print(" Detected mood:", mood)

    # Get top 3 recommendations for that mood
    recs = recommend(mood=mood, top_n=3, user_text=user_text)

    print(f"\nðŸŽ¬ Top 3 recommendations for mood = '{mood}':")
    for i, rec in enumerate(recs, start=1):
        print(f"\n  #{i}: {rec['title']}")
        print("     Genres      :", ", ".join(rec["genres"]) if rec["genres"] else "N/A")
        print("     Avg rating  :", rec["avg_rating"])
        print("     Vibe cluster:", rec.get("vibe_cluster"))
        print("     Explanation :", rec["explanation"])

print("\n" + "="*80)
print("âœ… Full AI pipeline test finished.")


Device set to use cpu


âœ… Loaded module from: /content/backend/ai/emotion_detection.py
Functions available:
 - load_movies : True
 - detect_mood : True
 - recommend   : True

 Movies dataframe info:
Shape: (9742, 9)
Columns: ['movieId', 'title', 'genres', 'title_cleaned', 'release_year', 'genres_str', 'combined_features', 'avg_rating', 'vibe_cluster']

âœ… 'vibe_cluster' column found.
                                title  vibe_cluster
0                    Toy Story (1995)             3
1                      Jumanji (1995)             3
2             Grumpier Old Men (1995)             2
3            Waiting to Exhale (1995)             2
4  Father of the Bride Part II (1995)             7

Test case #1
User text: I feel very low and lonely today. Nothing seems exciting.
 Detected mood: sad

ðŸŽ¬ Top 3 recommendations for mood = 'sad':

  #1: L.I.E. (2001)
     Genres      : Drama
     Avg rating  : 4.333333333333333
     Vibe cluster: 6
     Explanation : To go with your sad mood, we suggest L.I.E. (2001)

In [None]:
#  trying different similarity vs rating weights

import os, sys, importlib

# make sure we have the latest version
sys.path.append(os.path.abspath("."))
import backend.ai.emotion_detection as emo
importlib.reload(emo)

test_mood = "happy"   # you can change to "sad", "romantic", etc.
weight_pairs = [
    (0.5, 0.5),
    (0.7, 0.3),
    (0.8, 0.2)
]

for w_sim, w_rat in weight_pairs:
    print(f"\n=== Testing weights: similarity={w_sim}, rating={w_rat} ===")
    recs = emo.recommend(test_mood, top_n=5, weight_sim=w_sim, weight_rating=w_rat)
    for i, rec in enumerate(recs, start=1):
        print(f"{i}. {rec['title']} | Genres: {rec['genres']} | Rating: {rec['avg_rating']}")
    print("-" * 60)


Device set to use cpu



=== Testing weights: similarity=0.5, rating=0.5 ===
1. All Yours (2016) | Genres: ['Comedy', 'Drama', 'Romance'] | Rating: 5.0
2. It Can't Be! (1975) | Genres: ['Comedy'] | Rating: 4.5
3. Ten, The (2007) | Genres: ['Comedy'] | Rating: 4.5
4. Last Detail, The (1973) | Genres: ['Comedy', 'Drama'] | Rating: 4.25
5. P.S. (2004) | Genres: ['Comedy', 'Drama', 'Fantasy', 'Romance'] | Rating: 4.25
------------------------------------------------------------

=== Testing weights: similarity=0.7, rating=0.3 ===
1. All Yours (2016) | Genres: ['Comedy', 'Drama', 'Romance'] | Rating: 5.0
2. You Can't Take It with You (1938) | Genres: ['Comedy', 'Romance'] | Rating: 3.8333333333333335
3. Ten, The (2007) | Genres: ['Comedy'] | Rating: 4.5
4. It Can't Be! (1975) | Genres: ['Comedy'] | Rating: 4.5
5. Last Detail, The (1973) | Genres: ['Comedy', 'Drama'] | Rating: 4.25
------------------------------------------------------------

=== Testing weights: similarity=0.8, rating=0.2 ===
1. All Yours (2016) |

In [None]:
!zip -r vyber_ai.zip backend data models vyber_ai_mood_recommender_grifith.ipynb


In [None]:
!ls
!ls *.ipynb
