# Testing

In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Union




In [3]:
df = pd.read_csv('final_dataset.csv')

In [4]:
df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))

model = SentenceTransformer('all-MiniLM-L6-v2')

combined_emb = np.load('movie_embeddings.npy')

In [5]:
def recommend_movies(user_query, top_n=5, verbose=True):
    query_emb = model.encode([user_query], convert_to_tensor=True)
    query_emb = query_emb / np.linalg.norm(query_emb)
    
    sim_scores = cosine_similarity(query_emb, combined_emb)
    
    top_indices = np.argsort(sim_scores[0])[-top_n:][::-1]
    recommendations = df.iloc[top_indices].copy()
    recommendations['similarity_score'] = sim_scores[0][top_indices]
    
    if verbose:
        print(f"\nRecommendations for query: '{user_query}'\n")
        print("{:<5} {:<60} {:<15} {}".format(
            "#", "Movie Title", "Similarity", "Genres"))
        print("-"*100)
        
        for i, (idx, row) in enumerate(recommendations.iterrows(), 1):
            genres = ', '.join(row['genres']) if row['genres'] else 'no data'
            print("{:<5} {:<60} {:<15.3f} {}".format(
                i, 
                row['title'][:55] + "..." if len(row['title']) > 55 else row['title'],
                row['similarity_score'],
                genres))
    
    return recommendations[['title', 'genres', 'similarity_score']]

In [6]:
user_query = "Crime drama about drug cartels and their violence"
recommendations = recommend_movies(user_query)
print(recommendations)


Recommendations for query: 'Crime drama about drug cartels and their violence'

#     Movie Title                                                  Similarity      Genres
----------------------------------------------------------------------------------------------------
1     This Rebel Breed                                             0.595           [, ', c, r, i, m, e,  , d, r, a, m, a, ', ]
2     Sinaloa Foot Soldier: Inside a Mexican Narco-Militia         0.574           [, ', ', ]
3     Traffic                                                      0.573           [, ', r, e, a, l, i, s, m, ', ,,  , ', m, u, r, d, e, r, ', ,,  , ', v, i, o, l, e, n, c, e, ', ,,  , ', f, l, a, s, h, b, a, c, k, ', ,,  , ', h, u, m, o, r, ', ,,  , ', s, u, s, p, e, n, s, e, f, u, l, ', ]
4     Buying Time                                                  0.572           [, ', c, r, i, m, e,  , d, r, a, m, a, ', ]
5     Snow                                                         0.568           [, ',

### Ranking of recommendation lists

**`MetricsCalculator`** class is designed for evaluating our recommendation system's results with **Precision@k**, **Recall@k**, **MAP**, and **NDCG** metrics.

In [7]:
class MetricsCalculator:
    @staticmethod
    def precision_at_k(relevant_items: List[int], recommended_items: List[int], k: int) -> float:
        top_k = recommended_items[:k]
        relevant_in_top_k = len(set(top_k) & set(relevant_items))
        return relevant_in_top_k / k if k > 0 else 0.0
    
    @staticmethod
    def recall_at_k(relevant_items: List[int], recommended_items: List[int], k: int) -> float:
        top_k = recommended_items[:k]
        relevant_in_top_k = len(set(top_k) & set(relevant_items))
        return relevant_in_top_k / len(relevant_items) if relevant_items else 0.0
    
    @staticmethod
    def average_precision(relevant_items: List[int], recommended_items: List[int]) -> float:
        ap = 0.0
        num_relevant = len(relevant_items)
        relevant_positions = [i+1 for i, item in enumerate(recommended_items) if item in relevant_items]
        
        for i, pos in enumerate(relevant_positions):
            ap += (i+1) / pos
        
        return ap / num_relevant if num_relevant > 0 else 0.0
    
    @staticmethod
    def mean_average_precision(relevant_items_list: List[List[int]], recommended_items_list: List[List[int]]) -> float:
        ap_scores = [
            MetricsCalculator.average_precision(relevant, recommended)
            for relevant, recommended in zip(relevant_items_list, recommended_items_list)
        ]
        return np.mean(ap_scores) if ap_scores else 0.0
    
    @staticmethod
    def ndcg_at_k(relevant_items: List[int], recommended_items: List[int], k: int, relevance_scores: Dict[int, float] = None) -> float:
        top_k = recommended_items[:k]
        if relevance_scores is None:
            relevance_scores = {item: 1.0 for item in relevant_items}
        
        dcg = sum(
            (relevance_scores.get(item, 0) / np.log2(i + 2) 
            for i, item in enumerate(top_k))
        )
        
        ideal_relevance = sorted([relevance_scores.get(item, 0) for item in relevant_items], reverse=True)[:k]
        idcg = sum(rel / np.log2(i + 2) for i, rel in enumerate(ideal_relevance))
        
        return dcg / idcg if idcg > 0 else 0.0

    @staticmethod
    def calculate_all_metrics(
        relevant_items_list: List[List[int]],
        recommended_items_list: List[List[int]],
        k_values: List[int] = [1, 3, 5, 10],
        relevance_scores_list: List[Dict[int, float]] = None
    ) -> Dict[str, Union[float, Dict[int, float]]]:
        results = {
            'MAP': MetricsCalculator.mean_average_precision(relevant_items_list, recommended_items_list),
            'Precision@k': {},
            'Recall@k': {},
            'NDCG@k': {}
        }
        
        for k in k_values:
            results['Precision@k'][k] = np.mean([
                MetricsCalculator.precision_at_k(relevant, recommended, k)
                for relevant, recommended in zip(relevant_items_list, recommended_items_list)
            ])
            
            results['Recall@k'][k] = np.mean([
                MetricsCalculator.recall_at_k(relevant, recommended, k)
                for relevant, recommended in zip(relevant_items_list, recommended_items_list)
            ])
            
            if relevance_scores_list:
                results['NDCG@k'][k] = np.mean([
                    MetricsCalculator.ndcg_at_k(relevant, recommended, k, rel_scores)
                    for relevant, recommended, rel_scores in zip(relevant_items_list, recommended_items_list, relevance_scores_list)
                ])
        
        return results

In [8]:
queries = [
  {
    "query": "Looking for a psychological horror film with supernatural elements and a strong female lead.",
    "relevant_movies": ["The Babadook", "Gothika", "The Exorcism of Emily Rose", "Janghwa, Hongryeon", "Penny Dreadful", "Crimson Peak"]
  },
  {
    "query": "I want a gritty crime drama that explores corruption, violence, and justice in urban settings.",
    "relevant_movies": ["American History X", "L.A. Confidential", "Brooklyn's Finest", "Marked for Death", "Harry Brown", "Nightcrawler"]
  },
  {
    "query": "Give me a fantasy or sci-fi movie that involves futuristic or post-apocalyptic worlds.",
    "relevant_movies": ["Star Trek: Nemesis", "Pandorum", "The Golden Compass", "Titan A.E.", "X-Men: Days of Future Past", "Arrival"]
  },
  {
    "query": "I'm in the mood for a romantic drama set against war or political unrest.",
    "relevant_movies": ["In the Land of Blood and Honey", "Partition", "Becoming Jane", "Jane Eyre", "The Constant Gardener"]
  },
  {
    "query": "Looking for a film about exorcism, possession, or demonic forces.",
    "relevant_movies": ["The Rite", "The Exorcism of Emily Rose", "Taste the Blood of Dracula", "Dracula Has Risen from the Grave", "Fallen", "The Devil Bat"]
  },
  {
    "query": "Show me movies about journalists, writers, or people uncovering secrets and conspiracies.",
    "relevant_movies": ["The Ghost Writer", "The Constant Gardener", "The Interpreter", "L.A. Confidential", "The Game"]
  },
  {
    "query": "I want a feel-good comedy with awkward romance or unexpected love stories.",
    "relevant_movies": ["Trainwreck", "The Princess Diaries", "Maid in Manhattan", "Failure to Launch", "We're the Millers", "Friends with Kids"]
  },
  {
    "query": "I'm looking for action-packed spy thrillers with political or military intrigue.",
    "relevant_movies": ["London Has Fallen", "Patriot Games", "The Taking of Pelham 123", "X-Men: Apocalypse", "The Assignment", "Dr. No"]
  },
  {
    "query": "Show me a coming-of-age story or a teen movie with humor and heart.",
    "relevant_movies": ["13 Going on 30", "The Spectacular Now", "Prom", "We're the Millers", "The Girl Next Door", "Open Season 3"]
  },
  {
    "query": "Movies about mental illness, emotional trauma, or identity struggles.",
    "relevant_movies": ["Spider", "Peacock", "The Jacket", "Eternal Sunshine of the Spotless Mind", "Gothika", "Nightcrawler"]
  },
  {
    "query": "I want a creepy mystery involving an old house or a haunting.",
    "relevant_movies": ["House on Haunted Hill", "The Babadook", "Crimson Peak", "Carnival of Souls", "The Ring"]
  },
  {
    "query": "Give me war films or military dramas that show the cost of conflict.",
    "relevant_movies": ["Hacksaw Ridge", "Three Kings", "La battaglia di Algeri", "Empire of the Sun", "Santa Fe Trail"]
  },
  {
    "query": "Looking for animated or fantasy movies suitable for families and kids.",
    "relevant_movies": ["Scooby-Doo! And the Legend of the Vampire", "Frozen", "Ella Enchanted", "The Princess Diaries", "Bedtime Stories", "The Simpsons: Hit & Run"]
  },
  {
    "query": "A film where science or technology leads to unexpected consequences.",
    "relevant_movies": ["Chappie", "Antitrust", "Spider", "Super 8", "Star Trek Into Darkness", "Arrival"]
  },
  {
    "query": "Movies that explore dystopian or oppressive societies and rebellion.",
    "relevant_movies": ["V: The Final Battle", "Pandorum", "Trainspotting", "The Lord of the Rings: The Two Towers", "X-Men: Days of Future Past", "Children of the Corn"]
  },
  {
    "query": "I want a movie about undercover operations, secret identities, and double-crosses.",
    "relevant_movies": ["The Assignment", "The Ghost Writer", "The Game", "L.A. Confidential", "London Has Fallen"]
  },
  {
    "query": "Looking for a comedy with absurd humor and a quirky cast of characters.",
    "relevant_movies": ["Trainwreck", "We're the Millers", "Joe Dirt", "Young Frankenstein", "Super Troopers"]
  },
  {
    "query": "Give me a tragic love story with historical or social barriers.",
    "relevant_movies": ["Becoming Jane", "Jane Eyre", "Partition", "The Color Purple", "Madrasapattinam"]
  },
  {
    "query": "I want a musical or film with strong musical themes and performances.",
    "relevant_movies": ["Hair", "Bedtime Stories", "Pump Up the Volume", "Trainwreck", "Ella Enchanted"]
  },
  {
    "query": "Show me a film about an intense psychological game or mind-bending mystery.",
    "relevant_movies": ["The Game", "Spider", "The Jacket", "Trance", "Eternal Sunshine of the Spotless Mind"]
  },
  {
    "query": "Looking for a dark comedy that involves crime or inappropriate behavior.",
    "relevant_movies": ["Very Bad Things", "Bad Santa", "Trainwreck", "The Perfect Host", "The Babadook"]
  },
  {
    "query": "I want a movie where someone fights against a powerful system or government.",
    "relevant_movies": ["La battaglia di Algeri", "Trainspotting", "The Constant Gardener", "Arrival", "Nightcrawler"]
  },
  {
    "query": "Show me sci-fi or action films that explore time, memory, or identity loss.",
    "relevant_movies": ["The Jacket", "Trance", "Spider", "Arrival", "X-Men: Days of Future Past"]
  },
  {
    "query": "Give me a war or combat film with heroic themes and sacrifice.",
    "relevant_movies": ["Hacksaw Ridge", "Three Kings", "Empire of the Sun", "Santa Fe Trail", "Malcolm X"]
  },
  {
    "query": "Looking for a horror film that mixes supernatural with psychological terror.",
    "relevant_movies": ["The Babadook", "Gothika", "Crimson Peak", "The Ring", "Penny Dreadful"]
  },
  {
    "query": "I want a movie featuring vampires or the undead with a horror tone.",
    "relevant_movies": ["Dracula Has Risen from the Grave", "Taste the Blood of Dracula", "Phantasm", "Carnival of Souls", "The Return of the Living Dead"]
  },
  {
    "query": "Show me an emotional drama about illness, family, and difficult choices.",
    "relevant_movies": ["My Sister's Keeper", "Peacock", "The Color Purple", "Eternal Sunshine of the Spotless Mind", "Spider"]
  },
  {
    "query": "Looking for a romantic comedy with mistaken identities or fake relationships.",
    "relevant_movies": ["Maid in Manhattan", "We're the Millers", "The Princess Diaries", "Failure to Launch", "Friends with Kids"]
  },
  {
    "query": "I want a mystery or thriller set in an isolated or confined location.",
    "relevant_movies": ["Penny Dreadful", "The Jacket", "Pandorum", "The Ghost Writer", "The Babadook"]
  },
  {
    "query": "Give me an animated or family-friendly film with magical elements.",
    "relevant_movies": ["Frozen", "Ella Enchanted", "Bedtime Stories", "Scooby-Doo! And the Legend of the Vampire", "The Princess Diaries"]
  },
  {
    "query": "Show me a coming-of-age film that involves social awkwardness or youth discovery.",
    "relevant_movies": ["13 Going on 30", "The Spectacular Now", "Trainwreck", "The Girl Next Door", "Prom"]
  },
  {
    "query": "I'm in the mood for a biographical or historical drama based on real events.",
    "relevant_movies": ["Malcolm X", "Empire of the Sun", "Hacksaw Ridge", "The Iron Lady", "The Constant Gardener"]
  },
  {
    "query": "Looking for a film about addiction, self-destruction, or personal downfall.",
    "relevant_movies": ["Trainspotting", "Nightcrawler", "Spider", "Trance", "Very Bad Things"]
  },
  {
    "query": "I want a movie with a female action lead in a supernatural or fantasy world.",
    "relevant_movies": ["Lara Croft: Tomb Raider", "Ella Enchanted", "Chappie", "Arrival", "Crimson Peak"]
  },
  {
    "query": "Show me a film where a journalist or investigator exposes a major secret.",
    "relevant_movies": ["The Constant Gardener", "The Interpreter", "The Ghost Writer", "L.A. Confidential", "The Game"]
  }
]