# Testing

In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Union




In [3]:
df = pd.read_csv('final_dataset.csv')

In [4]:
df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))

model = SentenceTransformer('all-MiniLM-L6-v2')

combined_emb = np.load('movie_embeddings.npy')

In [12]:
# def recommend_movies(user_query, top_n=5):
#     query_emb = model.encode([user_query], convert_to_tensor=True)
#     query_emb = query_emb / np.linalg.norm(query_emb)
    
#     sim_scores = cosine_similarity(query_emb, combined_emb)
#     top_indices = np.argsort(sim_scores[0])[-top_n:][::-1]
    
#     return df.iloc[top_indices]


def recommend_movies(user_query, top_n=5, verbose=True):
    query_emb = model.encode([user_query], convert_to_tensor=True)
    query_emb = query_emb / np.linalg.norm(query_emb)
    sim_scores = cosine_similarity(query_emb, combined_emb)
    
    top_indices = np.argsort(sim_scores[0])[-top_n:][::-1]
    recommendations = df.iloc[top_indices].copy()
    
    recommendations['similarity_score'] = sim_scores[0][top_indices]
    
    if verbose:
        print(f"\nРекомендации для запроса: '{user_query}'\n")
        print("{:<5} {:<60} {:<15} {}".format(
            "№", "Название фильма", "Схожесть", "Жанры"))
        print("-"*100)
        
        for i, (idx, row) in enumerate(recommendations.iterrows(), 1):
            genres = ', '.join(row['genres']) if row['genres'] else 'нет данных'
            print("{:<5} {:<60} {:<15.3f} {}".format(
                i, 
                row['title'][:55] + "..." if len(row['title']) > 55 else row['title'],
                row['similarity_score'],
                genres))
    
    return recommendations[['title', 'genres', 'similarity_score']]



In [13]:
user_query = "Crime drama about drug cartels and their violence"
recommendations = recommend_movies(user_query)
print(recommendations)


Рекомендации для запроса: 'Crime drama about drug cartels and their violence'

№     Название фильма                                              Схожесть        Жанры
----------------------------------------------------------------------------------------------------
1     This Rebel Breed                                             0.595           [, ', c, r, i, m, e,  , d, r, a, m, a, ', ]
2     Sinaloa Foot Soldier: Inside a Mexican Narco-Militia         0.574           [, ', ', ]
3     Traffic                                                      0.573           [, ', r, e, a, l, i, s, m, ', ,,  , ', m, u, r, d, e, r, ', ,,  , ', v, i, o, l, e, n, c, e, ', ,,  , ', f, l, a, s, h, b, a, c, k, ', ,,  , ', h, u, m, o, r, ', ,,  , ', s, u, s, p, e, n, s, e, f, u, l, ', ]
4     Buying Time                                                  0.572           [, ', c, r, i, m, e,  , d, r, a, m, a, ', ]
5     Snow                                                         0.568           [, ', d

### Ranking of recommendation lists

**`MetricsCalculator`** class is designed for evaluating our recommendation system's results with **Precision@k**, **Recall@k**, **MAP**, and **NDCG** metrics.

In [14]:
class MetricsCalculator:
    @staticmethod
    def precision_at_k(relevant_items: List[int], recommended_items: List[int], k: int) -> float:
        """
        Compute Precision@k.
        
        Args:
            relevant_items: List of ground-truth relevant item IDs.
            recommended_items: Ranked list of recommended item IDs.
            k: Number of top results to consider.
        
        Returns:
            Precision@k score.
        """
        top_k = recommended_items[:k]
        relevant_in_top_k = len(set(top_k) & set(relevant_items))
        return relevant_in_top_k / k if k > 0 else 0.0
    
    @staticmethod
    def recall_at_k(relevant_items: List[int], recommended_items: List[int], k: int) -> float:
        """
        Compute Recall@k.
        
        Args:
            relevant_items: List of ground-truth relevant item IDs.
            recommended_items: Ranked list of recommended item IDs.
            k: Number of top results to consider.
        
        Returns:
            Recall@k score.
        """
        top_k = recommended_items[:k]
        relevant_in_top_k = len(set(top_k) & set(relevant_items))
        return relevant_in_top_k / len(relevant_items) if relevant_items else 0.0
    
    @staticmethod
    def average_precision(relevant_items: List[int], recommended_items: List[int]) -> float:
        """
        Compute Average Precision (AP) for a single query.
        
        Args:
            relevant_items: List of ground-truth relevant item IDs.
            recommended_items: Ranked list of recommended item IDs.
        
        Returns:
            AP score.
        """
        ap = 0.0
        num_relevant = len(relevant_items)
        relevant_positions = [i+1 for i, item in enumerate(recommended_items) if item in relevant_items]
        
        for i, pos in enumerate(relevant_positions):
            ap += (i+1) / pos
        
        return ap / num_relevant if num_relevant > 0 else 0.0
    
    @staticmethod
    def mean_average_precision(relevant_items_list: List[List[int]], recommended_items_list: List[List[int]]) -> float:
        """
        Compute MAP (Mean Average Precision) across multiple queries.
        
        Args:
            relevant_items_list: List of relevant item IDs for each query.
            recommended_items_list: List of ranked recommendations for each query.
        
        Returns:
            MAP score.
        """
        ap_scores = [
            MetricsCalculator.average_precision(relevant, recommended)
            for relevant, recommended in zip(relevant_items_list, recommended_items_list)
        ]
        return np.mean(ap_scores) if ap_scores else 0.0
    
    @staticmethod
    def ndcg_at_k(relevant_items: List[int], recommended_items: List[int], k: int, relevance_scores: Dict[int, float] = None) -> float:
        """
        Compute NDCG@k.
        
        Args:
            relevant_items: List of ground-truth relevant item IDs.
            recommended_items: Ranked list of recommended item IDs.
            k: Number of top results to consider.
            relevance_scores: Dictionary of item relevance scores (default: binary relevance).
        
        Returns:
            NDCG@k score.
        """
        top_k = recommended_items[:k]
        if relevance_scores is None:
            relevance_scores = {item: 1.0 for item in relevant_items}
        
        dcg = sum(
            (relevance_scores.get(item, 0) / np.log2(i + 2) 
            for i, item in enumerate(top_k))
        )
        
        ideal_relevance = sorted([relevance_scores.get(item, 0) for item in relevant_items], reverse=True)[:k]
        idcg = sum(rel / np.log2(i + 2) for i, rel in enumerate(ideal_relevance))
        
        return dcg / idcg if idcg > 0 else 0.0

    @staticmethod
    def calculate_all_metrics(
        relevant_items_list: List[List[int]],
        recommended_items_list: List[List[int]],
        k_values: List[int] = [1, 3, 5, 10],
        relevance_scores_list: List[Dict[int, float]] = None
    ) -> Dict[str, Union[float, Dict[int, float]]]:
        """
        Compute all metrics for a set of queries.
        
        Args:
            relevant_items_list: List of relevant item IDs for each query.
            recommended_items_list: List of ranked recommendations for each query.
            k_values: List of k values for Precision@k and Recall@k.
            relevance_scores_list: List of relevance score dictionaries for NDCG.
        
        Returns:
            Dictionary containing all metrics:
            - MAP
            - Precision@k (for each k)
            - Recall@k (for each k)
            - NDCG@k (for each k, if relevance_scores provided).
        """
        results = {
            'MAP': MetricsCalculator.mean_average_precision(relevant_items_list, recommended_items_list),
            'Precision@k': {},
            'Recall@k': {},
            'NDCG@k': {}
        }
        
        for k in k_values:
            results['Precision@k'][k] = np.mean([
                MetricsCalculator.precision_at_k(relevant, recommended, k)
                for relevant, recommended in zip(relevant_items_list, recommended_items_list)
            ])
            
            results['Recall@k'][k] = np.mean([
                MetricsCalculator.recall_at_k(relevant, recommended, k)
                for relevant, recommended in zip(relevant_items_list, recommended_items_list)
            ])
            
            if relevance_scores_list:
                results['NDCG@k'][k] = np.mean([
                    MetricsCalculator.ndcg_at_k(relevant, recommended, k, rel_scores)
                    for relevant, recommended, rel_scores in zip(relevant_items_list, recommended_items_list, relevance_scores_list)
                ])
        
        return results