In [2]:
pip install pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer
import numpy as np

In [4]:
import pandas as pd

ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")

In [5]:
# Create a user-item matrix
user_movie_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating')

In [6]:
# Fill NaNs with 0s (or use imputation strategy)
imputer = SimpleImputer(strategy='constant', fill_value=0)
user_movie_filled = pd.DataFrame(imputer.fit_transform(user_movie_matrix),
                                 index=user_movie_matrix.index,
                                 columns=user_movie_matrix.columns)

In [7]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_movie_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

In [8]:
# Function to get recommendations
def get_knn_recommendations(user_id, num_neighbors=5, num_recommendations=5):
    if user_id not in user_similarity_df.index:
        return []

In [9]:
def get_knn_recommendations(user_id, num_neighbors=5, num_recommendations=5):
    if user_id not in user_similarity_df.index:
        return []

    # Get similarity scores for this user
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)
    similar_users = similar_users.drop(user_id)  # Remove self
    top_users = similar_users.head(num_neighbors).index

    # Get movies rated by similar users
    similar_users_ratings = user_movie_matrix.loc[top_users]

    # Average the ratings from neighbors
    avg_ratings = similar_users_ratings.mean(axis=0)

    # Drop movies the target user has already rated
    watched = user_movie_matrix.loc[user_id].dropna().index
    avg_ratings = avg_ratings.drop(watched, errors='ignore')

    # Get top N recommendations
    top_movie_ids = avg_ratings.sort_values(ascending=False).head(num_recommendations).index

    # Map movie IDs to titles
    recommended_titles = movies[movies['movieId'].isin(top_movie_ids)]['title'].values.tolist()
    return recommended_titles

In [10]:
user_ids = [1, 2, 3]
for uid in user_ids:
    recs = get_knn_recommendations(uid)
    print(f"\nTop recommendations for User {uid}:")
    for i, title in enumerate(recs, 1):
        print(f"{i}. {title}")



Top recommendations for User 1:
1. Life Is Beautiful (La Vita è bella) (1997)
2. Name of the Rose, The (Name der Rose, Der) (1986)
3. High Plains Drifter (1973)
4. Black Hawk Down (2001)
5. Brotherhood of the Wolf (Pacte des loups, Le) (2001)

Top recommendations for User 2:
1. Eyes Wide Shut (1999)
2. Pursuit of Happyness, The (2006)
3. Prestige, The (2006)
4. Limitless (2011)
5. Intouchables (2011)

Top recommendations for User 3:
1. 12 Angry Men (1957)
2. Lawrence of Arabia (1962)
3. Full Metal Jacket (1987)
4. Bug's Life, A (1998)
5. Emperor's New Groove, The (2000)
