In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from scipy.sparse import csr_matrix
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

movies_df = pd.read_csv("dataset/HW3/movie.csv")
ratings_df = pd.read_csv("dataset/HW3/rating.csv")

In [2]:
movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [3]:
movies_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27278 entries, 0 to 27277
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  27278 non-null  int64 
 1   title    27278 non-null  object
 2   genres   27278 non-null  object
dtypes: int64(1), object(2)
memory usage: 639.5+ KB


In [4]:
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00
20000259,138493,69526,4.5,2009-12-03 18:31:48
20000260,138493,69644,3.0,2009-12-07 18:10:57
20000261,138493,70286,5.0,2009-11-13 15:42:24


In [5]:
ratings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 610.4+ MB


In [6]:
ratings_df.isna().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [7]:
ratings_df.describe()

Unnamed: 0,userId,movieId,rating
count,20000260.0,20000260.0,20000260.0
mean,69045.87,9041.567,3.525529
std,40038.63,19789.48,1.051989
min,1.0,1.0,0.5
25%,34395.0,902.0,3.0
50%,69141.0,2167.0,3.5
75%,103637.0,4770.0,4.0
max,138493.0,131262.0,5.0


# Data preprocessing

In [8]:
# Map userId and movieId to index
user_to_id = {user: id for id, user in enumerate(ratings_df['userId'].unique())}
movie_to_id = {movie: id for id, movie in enumerate(ratings_df['movieId'].unique())}

# Reverse maps
id_to_user = {id: user for user, id in user_to_id.items()}
id_to_movie = {id: movie for movie, id in movie_to_id.items()}

# Add mapped indices
ratings_df['user_index'] = ratings_df['userId'].map(user_to_id)
ratings_df['movie_index'] = ratings_df['movieId'].map(movie_to_id)

# Create sparse matrix [users x movies]
num_users = len(user_to_id)
num_movies = len(movie_to_id)

# Sparse rating matrix
print(f"Creating sparse matrix: {num_users} users x {num_movies} movies")
user_item_sparse = csr_matrix((ratings_df['rating'], (ratings_df['user_index'], ratings_df['movie_index'])), shape=(num_users, num_movies))

Creating sparse matrix: 138493 users x 26744 movies


# USER-BASED

In [9]:
def calculate_user_means(user_item_sparse, num_users, num_movies):
    user_means = {}
    for user_id in range(num_users):
        user_ratings = user_item_sparse[user_id].data
        if len(user_ratings) > 0:
            user_means[user_id] = np.mean(user_ratings)
        else:
            user_means[user_id] = 2.5  # Default mean

    sparsity = (1 - (user_item_sparse.nnz / (num_users * num_movies))) * 100
    print(f"Sparse matrix sparsity: {sparsity:.2f}%")
    return user_means

In [10]:
def calculate_user_pearson_correlation(user1_id, user2_id):
    """
    Calculate Pearson correlation coefficient between two users
    """
    # Get ratings for both users
    user1_ratings = user_item_sparse[user1_id].toarray().flatten()
    user2_ratings = user_item_sparse[user2_id].toarray().flatten()

    # Find common rated movies (non-zero ratings)
    user1_mask = user1_ratings > 0
    user2_mask = user2_ratings > 0
    common_mask = user1_mask & user2_mask

    if np.sum(common_mask) < 2:
        return 0.0
    
    user1_common = user1_ratings[common_mask]
    user2_common = user2_ratings[common_mask]

    # Calculate Pearson correlation
    if len(user1_common) > 1 and len(user2_common) > 1:
        correlation = np.corrcoef(user1_common, user2_common)[0, 1]
        return correlation if not np.isnan(correlation) else 0.0
    
    return 0.0

In [11]:
def find_similar_users(target_user_id, threshold=0.5, top_k=10, max_sample_size=5000):
    """
    Find similar users using Pearson Correlation Coefficient with sampling for large datasets
    """
    if target_user_id not in user_to_id:
        print(f"User {target_user_id} not found in filtered dataset!")
        return []
    
    target_user_idx = user_to_id[target_user_id]
    num_users = user_item_sparse.shape[0]

    # For very large datasets, sample users to check
    if num_users > 10000:
        # Sample users randomly for similarity calculation
        sample_size = min(max_sample_size, num_users - 1)
        candidate_indices = np.random.choice(
            [i for i in range(num_users) if i != target_user_idx], 
            sample_size, 
            replace=False
        )
    else:
        candidate_indices = [i for i in range(num_users) if i != target_user_idx]
    
    similarities = {}

    # Calculate similarities
    for other_user_idx in candidate_indices:
        correlation = calculate_user_pearson_correlation(target_user_idx, other_user_idx)
        
        if correlation >= threshold:
            other_user_id = id_to_user[other_user_idx]
            similarities[other_user_id] = correlation

    # Sort by similarity and get top K
    sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]
    
    print(f"Found {len(sorted_similarities)} similar users (threshold >= {threshold})")
    for user_id, sim in sorted_similarities[:5]:  # Show top 5
        print(f"  User {user_id}: PCC = {sim:.3f}")
    
    return sorted_similarities

In [12]:
def predict_rating_user_based(target_user_id, movie_id, similar_users, user_means):
    """
    Predict rating for a movie using weighted average of similar users' ratings
    """
    if not similar_users or movie_id not in movie_to_id:
        return None
    
    target_user_idx = user_to_id[target_user_id]
    movie_idx = movie_to_id[movie_id]
    target_user_mean = user_means[target_user_idx]
    
    weighted_sum = 0.0
    similarity_sum = 0.0
    
    for similar_user_id, similarity in similar_users:
        if similar_user_id in user_to_id:
            similar_user_idx = user_to_id[similar_user_id]
            
            # Check if similar user rated this movie
            if user_item_sparse[similar_user_idx, movie_idx] > 0:
                similar_user_rating = user_item_sparse[similar_user_idx, movie_idx]
                similar_user_mean = user_means[similar_user_idx]
                
                # Mean-centered rating prediction
                weighted_sum += similarity * (similar_user_rating - similar_user_mean)
                similarity_sum += abs(similarity)
    
    if similarity_sum == 0:
        return target_user_mean
    
    predicted_rating = target_user_mean + (weighted_sum / similarity_sum)
    
    # Ensure rating is within valid range
    predicted_rating = max(1, min(5, predicted_rating))
    
    return predicted_rating

In [13]:
def get_candidate_movies_user_based(target_user_id, similar_users, max_candidates=500):
    """
    Get candidate movies (movies rated by similar users but not by target user)
    """   
    if target_user_id not in user_to_id:
        return []
    
    target_user_idx = user_to_id[target_user_id]
    
    # Get movies already rated by target user
    target_user_movies = set(user_item_sparse[target_user_idx].nonzero()[1])
    
    candidate_movies = set()
    
    # Get movies from similar users
    for similar_user_id, _ in similar_users:
        if similar_user_id in user_to_id:
            similar_user_idx = user_to_id[similar_user_id]
            similar_user_movies = set(user_item_sparse[similar_user_idx].nonzero()[1])
            
            # Movies rated by similar user but not by target user
            new_candidates = similar_user_movies - target_user_movies
            candidate_movies.update(new_candidates)
            
            # Limit candidates to avoid excessive computation
            if len(candidate_movies) > max_candidates:
                break
    
    # Convert back to movie IDs
    candidate_movie_ids = [id_to_movie[movie_idx] for movie_idx in candidate_movies]
    
    return candidate_movie_ids[:max_candidates]

In [14]:
def explain_recommendation_user_based(movie_id, predicted_rating, similar_users):
    """
    Explain why a movie was recommended using user-based collaborative filtering
    """  
    print(f"\nExplanation for Movie {movie_id}:")
    print(f"Predicted Rating: {predicted_rating:.2f}")
    
    contributing_users = []
    movie_idx = movie_to_id[movie_id]
    
    for similar_user_id, similarity in similar_users:
        if similar_user_id in user_to_id:
            similar_user_idx = user_to_id[similar_user_id]
            if user_item_sparse[similar_user_idx, movie_idx] > 0:
                actual_rating = user_item_sparse[similar_user_idx, movie_idx]
                contributing_users.append((similar_user_id, similarity, actual_rating))
    
    print("Contributing similar users:")
    for user_id, sim, rating in contributing_users[:3]:  # Show top 3 contributors
        print(f"  User {user_id} (PCC: {sim:.3f}) rated this movie: {rating:.1f}")
    
    return contributing_users

In [15]:
def recommend_movies_user_based(target_user_id, user_means, top_n=5, threshold=0.5, top_k=10):
    """
    Generate top N movie recommendations using user-based collaborative filtering
    """
    # Find similar users
    similar_users = find_similar_users(target_user_id, threshold, top_k)
    
    if not similar_users:
        print(f"No similar users found for User {target_user_id} with threshold {threshold}")
        return [], []
    
    # Get candidate movies
    candidate_movies = get_candidate_movies_user_based(target_user_id, similar_users)
    
    if not candidate_movies:
        print(f"No candidate movies found for User {target_user_id}")
        return [], similar_users
    
    print(f"Evaluating {len(candidate_movies)} candidate movies...")
    
    # Predict ratings for candidate movies
    movie_predictions = []
    
    for movie_id in candidate_movies:
        predicted_rating = predict_rating_user_based(target_user_id, movie_id, similar_users, user_means)
        if predicted_rating is not None:
            movie_predictions.append((movie_id, predicted_rating))
    
    # Sort by predicted rating and get top N
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = movie_predictions[:top_n]
    
    return top_recommendations, similar_users

# Item-based

In [16]:
def calculate_item_means(item_user_sparse, num_users, num_movies):
    item_means = {}
    for movie_id in range(num_movies):
        movie_ratings = item_user_sparse[movie_id].data
        if len(movie_ratings) > 0:
            item_means[movie_id] = np.mean(movie_ratings)
        else:
            item_means[movie_id] = 2.5  # Default mean

    sparsity = (1 - (user_item_sparse.nnz / (num_users * num_movies))) * 100
    print(f"Sparse matrix sparsity: {sparsity:.2f}%")
    return item_means

In [17]:
def calculate_item_pearson_correlation(item1_id, item2_id):
    """
    Calculate Pearson correlation coefficient between two items (movies)
    """
    # Get ratings for both items across all users
    item1_ratings = user_item_sparse[item1_id].toarray().flatten()
    item2_ratings = user_item_sparse[item2_id].toarray().flatten()

    # Find common users who rated both items (non-zero ratings)
    item1_mask = item1_ratings > 0
    item2_mask = item2_ratings > 0
    common_mask = item1_mask & item2_mask

    if np.sum(common_mask) < 2:
        return 0.0
    
    item1_common = item1_ratings[common_mask]
    item2_common = item2_ratings[common_mask]

    # Calculate Pearson correlation
    if len(item1_common) > 1 and len(item2_common) > 1:
        correlation = np.corrcoef(item1_common, item2_common)[0, 1]
        return correlation if not np.isnan(correlation) else 0.0
    
    return 0.0

In [None]:
def find_similar_items(target_movie_id, threshold=0.5, top_k=10, max_sample_size=5000):
    """
    Find similar items (movies) using Pearson Correlation Coefficient
    """
    if target_movie_id not in movie_to_id:
        print(f"Movie {target_movie_id} not found in filtered dataset!")
        return []
    
    target_movie_idx = movie_to_id[target_movie_id]
    num_movies = user_item_sparse.shape[1]

    # For very large datasets, sample movies to check
    if num_movies > 10000:
        # Sample movies randomly for similarity calculation
        sample_size = min(max_sample_size, num_movies - 1)
        candidate_indices = np.random.choice(
            [i for i in range(num_movies) if i != target_movie_idx], 
            sample_size, 
            replace=False
        )
    else:
        candidate_indices = [i for i in range(num_movies) if i != target_movie_idx]
    
    similarities = {}

    # Calculate similarities
    for other_movie_idx in candidate_indices:
        correlation = calculate_item_pearson_correlation(target_movie_idx, other_movie_idx)
        
        if correlation >= threshold:
            other_movie_id = id_to_movie[other_movie_idx]
            similarities[other_movie_id] = correlation

    # Sort by similarity and get top K
    sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]
    
    return sorted_similarities

In [19]:
def predict_rating_item_based(target_user_id, movie_id, similar_items, item_means):
    """
    Predict rating for a movie using item-based collaborative filtering
    """
    if not similar_items or target_user_id not in user_to_id:
        return None
    
    target_user_idx = user_to_id[target_user_id]
    movie_idx = movie_to_id[movie_id]
    
    weighted_sum = 0.0
    similarity_sum = 0.0
    
    for similar_movie_id, similarity in similar_items:
        if similar_movie_id in movie_to_id:
            similar_movie_idx = movie_to_id[similar_movie_id]
            
            # Check if target user rated this similar movie
            if user_item_sparse[target_user_idx, similar_movie_idx] > 0:
                user_rating = user_item_sparse[target_user_idx, similar_movie_idx]
                
                weighted_sum += similarity * user_rating
                similarity_sum += abs(similarity)
    
    if similarity_sum == 0:
        return item_means[movie_idx]
    
    predicted_rating = weighted_sum / similarity_sum
    
    # Ensure rating is within valid range
    predicted_rating = max(1, min(5, predicted_rating))
    
    return predicted_rating

In [20]:
def get_candidate_movies_item_based(target_user_id, num_candidates=500):
    """
    Get candidate movies for item-based recommendations
    (movies not rated by target user)
    """   
    if target_user_id not in user_to_id:
        return []
    
    target_user_idx = user_to_id[target_user_id]
    
    # Get movies already rated by target user
    target_user_movies = set(user_item_sparse[target_user_idx].nonzero()[1])
    
    # Get all movies not rated by target user
    all_movies = set(range(num_movies))
    candidate_movies = all_movies - target_user_movies
    
    # Convert back to movie IDs and limit candidates
    candidate_movie_ids = [id_to_movie[movie_idx] for movie_idx in candidate_movies]
    
    return candidate_movie_ids[:num_candidates]

In [21]:
def get_user_rated_movies(target_user_id, min_rating=4.0):
    """
    Get movies rated highly by the target user (for finding similar items)
    """
    if target_user_id not in user_to_id:
        return []
    
    target_user_idx = user_to_id[target_user_id]
    
    # Get all movies rated by user
    user_movie_indices = user_item_sparse[target_user_idx].nonzero()[1]
    
    highly_rated_movies = []
    for movie_idx in user_movie_indices:
        rating = user_item_sparse[target_user_idx, movie_idx]
        if rating >= min_rating:
            movie_id = id_to_movie[movie_idx]
            highly_rated_movies.append((movie_id, rating))
    
    return highly_rated_movies

In [22]:
def explain_recommendation_item_based(target_user_id, movie_id, predicted_rating, similar_items):
    """
    Explain why a movie was recommended using item-based collaborative filtering
    """  
    print(f"\nExplanation for Movie {movie_id} (Item-based):")
    print(f"Predicted Rating: {predicted_rating:.2f}")
    
    contributing_items = []
    target_user_idx = user_to_id[target_user_id]
    
    for similar_movie_id, similarity in similar_items:
        if similar_movie_id in movie_to_id:
            similar_movie_idx = movie_to_id[similar_movie_id]
            if user_item_sparse[target_user_idx, similar_movie_idx] > 0:
                user_rating = user_item_sparse[target_user_idx, similar_movie_idx]
                contributing_items.append((similar_movie_id, similarity, user_rating))
    
    if contributing_items:
        print("Based on similar movies this user rated:")
        for movie_id, sim, rating in contributing_items[:3]:  # Show top 3 contributors
            print(f"  Movie {movie_id} (PCC: {sim:.3f}, user's rating: {rating:.1f})")
    else:
        print("Based on content similarity analysis:")
        print("This movie is similar to movies in user's profile, but specific contributing movies weren't found in user's rating history.")
        print("Top similar movies found:")
        for similar_movie_id, similarity in similar_items[:3]:
            print(f"  Movie {similar_movie_id}: (PCC: {similarity:.3f})")
    
    return contributing_items

In [23]:
def recommend_movies_item_based(target_user_id, item_means, top_n=5, threshold=0.5, top_k=10):
    """
    Generate movie recommendations using item-based collaborative filtering
    """
    # Get highly rated movies by the user
    user_rated_movies = get_user_rated_movies(target_user_id, min_rating=4.0)
    
    if not user_rated_movies:
        print(f"User {target_user_id} has no highly rated movies (>= 4.0)")
        return [], {}
    
    print(f"Found {len(user_rated_movies)} highly rated movies by user {target_user_id}")
    
    # Find movies similar to ones the user liked
    candidate_movies = set()
    movie_to_similar_items = {}
    
    # For each highly rated movie, find similar movies
    for rated_movie_id, rating in user_rated_movies[:10]:  # Limit to top 10 rated movies
        similar_items = find_similar_items(rated_movie_id, threshold, top_k)
        
        for similar_movie_id, similarity in similar_items:
            # Only add if user hasn't rated this similar movie
            if target_user_id in user_to_id:
                target_user_idx = user_to_id[target_user_id]
                
                if similar_movie_id in movie_to_id:
                    similar_movie_idx = movie_to_id[similar_movie_id]
                    
                    if user_item_sparse[target_user_idx, similar_movie_idx] == 0:
                        candidate_movies.add(similar_movie_id)
                        
                        # Store which movies this candidate is similar to
                        if similar_movie_id not in movie_to_similar_items:
                            movie_to_similar_items[similar_movie_id] = []
                        movie_to_similar_items[similar_movie_id].append((rated_movie_id, similarity))
    
    candidate_movies = list(candidate_movies)
    
    if not candidate_movies:
        print(f"No candidate movies found for User {target_user_id}")
        return [], {}
    
    print(f"Evaluating {len(candidate_movies)} candidate movies...")
    
    # Predict ratings for candidate movies
    movie_predictions = []
    all_similar_items = {}
    
    for movie_id in candidate_movies:
        # Use the similar items we found during candidate generation
        if movie_id in movie_to_similar_items:
            # Convert back to the format expected by predict_rating_item_based
            similar_items = [(item_id, sim) for item_id, sim in movie_to_similar_items[movie_id]]
            similar_items = sorted(similar_items, key=lambda x: x[1], reverse=True)[:top_k]
            
            predicted_rating = predict_rating_item_based(target_user_id, movie_id, similar_items, item_means)
            
            if predicted_rating is not None:
                movie_predictions.append((movie_id, predicted_rating))
                all_similar_items[movie_id] = similar_items
    
    # Sort by predicted rating and get top N
    movie_predictions.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = movie_predictions[:top_n]
    
    return top_recommendations, all_similar_items

# Display recommendations

In [24]:
def get_movie_details(movie_id):
    """
    Get movie title and genres from movie database
    """
    movie_info = movies_df[movies_df['movieId'] == movie_id]
    title = movie_info.iloc[0]['title']
    genres = movie_info.iloc[0]['genres']
    return title, genres

In [25]:
def display_recommendations(user_id, recommendations, similar_target, type="user-based"):
    """
    Display recommendations in a formatted way
    """
    if recommendations:
        print(f"\nTop {len(recommendations)} Movie Recommendations for User {user_id}:")
        
        for i, (movie_id, predicted_rating) in enumerate(recommendations, 1):
            title, genres = get_movie_details(movie_id)
            print("-" * 60)
            print(f"{i}. Movie {movie_id}")
            print(f"   Title: {title}")
            print(f"   Predicted Rating: {predicted_rating:.2f}")
            print(f"   Genres: {genres}")
            
            # Show explanation
            if (type == "user-based"):
                explain_recommendation_user_based(movie_id, predicted_rating, similar_target)
            else:
                explain_recommendation_item_based(user_id, movie_id, predicted_rating, similar_target[movie_id])
            print()
    else:
        print(f"No recommendations could be generated for User {user_id}")

In [26]:
def generate_recommendations_for_multiple_users(sparse_matrix, num_users, num_items, target_users, top_n=5, threshold=0.5, top_k=10, type="user-based"):
    """
    Generate recommendations for multiple users
    """ 
    print(f"{'='*80}")

    if (type == "user-based"):
        print("USER-BASED COLLABORATIVE FILTERING RECOMMENDATIONS")
        user_means = calculate_user_means(sparse_matrix, num_users, num_items)
    elif (type == "item-based"):
        print("ITEM-BASED COLLABORATIVE FILTERING RECOMMENDATIONS")
        item_means = calculate_item_means(sparse_matrix, num_users, num_items)
    else:
        print("Please choose 'user-based' or 'item-based' for parameter `type`")
        return None
        
    print(f"{'='*80}")
    
    # Filter target users to only those in the dataset
    available_users = [user for user in target_users if user in user_to_id]
    if not available_users:
        print("None of the target users are available in the filtered dataset!")
        # Use first 5 users from the dataset instead
        available_users = [id_to_user[i] for i in range(min(5, len(id_to_user)))]
        print(f"Using first {len(available_users)} users from dataset: {available_users}")
    
    all_recommendations = {}
    
    # Generate recommendations for each available user
    if (type == "user-based"):
        for user_id in available_users:
            print(f"RECOMMENDATIONS FOR USER {user_id}")
            print(f"{'='*60}")

            recommendations, similar_users = recommend_movies_user_based(user_id, user_means, top_n, threshold, top_k)
            display_recommendations(user_id, recommendations, similar_users, type)
            all_recommendations[user_id] = {"recommendations": recommendations, "similar_users": similar_users}
    else:
        for user_id in available_users:
            print(f"RECOMMENDATIONS FOR USER {user_id}")
            print(f"{'='*60}")

            recommendations, similar_items = recommend_movies_item_based(user_id, item_means, top_n, threshold, top_k)
            display_recommendations(user_id, recommendations, similar_items, type)
            all_recommendations[user_id] = {"recommendations": recommendations, "similar_items": similar_items}
        
    return all_recommendations

In [27]:
user_based_results = generate_recommendations_for_multiple_users(user_item_sparse, num_users, num_movies, target_users=[1, 2, 3, 4, 5], top_n=5, threshold=0.5, top_k=10, type="user-based")

USER-BASED COLLABORATIVE FILTERING RECOMMENDATIONS
Sparse matrix sparsity: 99.46%
RECOMMENDATIONS FOR USER 1
Found 10 similar users (threshold >= 0.5)
  User 66796: PCC = 1.000
  User 19125: PCC = 1.000
  User 69959: PCC = 1.000
  User 71040: PCC = 1.000
  User 36855: PCC = 1.000
Evaluating 346 candidate movies...

Top 5 Movie Recommendations for User 1:
------------------------------------------------------------
1. Movie 728
   Title: Cold Comfort Farm (1995)
   Predicted Rating: 5.00
   Genres: Comedy

Explanation for Movie 728:
Predicted Rating: 5.00
Contributing similar users:
  User 29193 (PCC: 1.000) rated this movie: 5.0

------------------------------------------------------------
2. Movie 2324
   Title: Life Is Beautiful (La Vita è bella) (1997)
   Predicted Rating: 5.00
   Genres: Comedy|Drama|Romance|War

Explanation for Movie 2324:
Predicted Rating: 5.00
Contributing similar users:
  User 122619 (PCC: 1.000) rated this movie: 5.0

------------------------------------------

In [28]:
# Create transpose for efficient item-based operations (movies x users)
item_user_sparse = user_item_sparse.T.tocsr()

item_based_results = generate_recommendations_for_multiple_users(item_user_sparse, num_users, num_movies, target_users=[1, 2, 3, 4, 5], top_n=5, threshold=0.5, top_k=10, type="item-based")

ITEM-BASED COLLABORATIVE FILTERING RECOMMENDATIONS
Sparse matrix sparsity: 99.46%
RECOMMENDATIONS FOR USER 1
Found 88 highly rated movies by user 1
Evaluating 97 candidate movies...

Top 5 Movie Recommendations for User 1:
------------------------------------------------------------
1. Movie 1041
   Title: Secrets & Lies (1996)
   Predicted Rating: 4.00
   Genres: Drama

Explanation for Movie 1041 (Item-based):
Predicted Rating: 4.00
Based on similar movies this user rated:
  Movie 260 (PCC: 1.000, user's rating: 4.0)

------------------------------------------------------------
2. Movie 26130
   Title: Docking the Boat (Att angöra en brygga) (1965)
   Predicted Rating: 4.00
   Genres: Comedy|Drama|Romance

Explanation for Movie 26130 (Item-based):
Predicted Rating: 4.00
Based on similar movies this user rated:
  Movie 253 (PCC: 1.000, user's rating: 4.0)

------------------------------------------------------------
3. Movie 77336
   Title: Cellar, The (Huset vid vägens ände) (2003)
  