In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

# --- STEP 1: SETUP (Run this once to define data_matrix and movie_prediction) ---

# Load Data
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1')

# Create the user-item matrix
data_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

# Calculate Item-Based Similarity
# Transpose matrix to compare movies (Items)
movie_similarity = pairwise_distances(data_matrix.T, metric='cosine')

# Calculate Item-Based Predictions
# This creates a matrix of (Users x Movies) containing predicted scores
movie_prediction_values = data_matrix.dot(movie_similarity) / np.array([np.abs(movie_similarity).sum(axis=1)])
movie_prediction = pd.DataFrame(movie_prediction_values, index=data_matrix.index, columns=data_matrix.columns)

# --- STEP 2: THE FUNCTION ---

def recommend_users_for_movie(movie_id, top_n=5):
    """
    Takes a Movie ID and returns the top N User IDs 
    who are most likely to enjoy this movie but haven't seen it yet.
    """
    if movie_id not in data_matrix.columns:
        return f"Error: Movie ID {movie_id} not found in the dataset."
    
    # 1. Get predicted scores for this specific movie across all users
    movie_scores = movie_prediction[movie_id]
    
    # 2. Identify users who have ALREADY watched this movie
    # We look at the original data_matrix
    watched_mask = data_matrix[movie_id] > 0
    
    # 3. Filter out those users by setting their score to a very low value
    filtered_scores = movie_scores.copy()
    filtered_scores[watched_mask] = -1e9
    
    # 4. Get the User IDs of the top N highest scores
    recommended_users = filtered_scores.sort_values(ascending=False).head(top_n).index.tolist()
    
    return recommended_users



In [7]:
# --- STEP 3: USAGE ---

target_movie_id = 34  
top_users = recommend_users_for_movie(target_movie_id)

print(f"Top 5 Users to recommend Movie ID {target_movie_id} to:")
print(top_users)

Top 5 Users to recommend Movie ID 34 to:
[450, 655, 13, 416, 303]
