In [None]:
# Can also be created using SVD (from scikit-suprise)

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Loading the datasets ratings and movies
ratings = pd.read_csv('../data/ratings.csv')
movies = pd.read_csv('../data/movies.csv')

In [2]:
# Creating a pivot table for user-item interactions(Matrix)
user_item_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
print("User-Item Matrix Shape:", user_item_matrix.shape)

User-Item Matrix Shape: (6040, 3706)


In [4]:
# Computing the cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix) # Compute cosine similarity
# Standardizing the user-item matrix
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
print("User Similarity Matrix Shape:", user_similarity_df.shape)

User Similarity Matrix Shape: (6040, 6040)


In [7]:
# Creating the function to get movie recommendations
def get_user_recomendations(user_id, num_recomendations=10):
    if user_id not in user_item_matrix.index:
        return f"User ID {user_id} not found in the dataset."
    
    # Get the similarity scores for the specified user with others
    sim_scores = user_similarity_df[user_id]

    # Multiply each user's rating by their similarity to the target user
    weighted_ratings = user_item_matrix.T.dot(sim_scores)

    # Normalize by similarity sum to avoid bias towards users with more ratings
    sim_sums = np.array([sim_scores[user_item_matrix[col] > 0].sum() for col in user_item_matrix.columns])
    scores = weighted_ratings / (sim_sums + 1e-9)  # Adding a small constant to avoid division by zero

    # Exclude movies already rated by the user
    user_rated = user_item_matrix.loc[user_id]
    user_unrated = scores[user_rated == 0]

    # Top recommended movies
    top_indices = user_unrated.sort_values(ascending=False).head(num_recomendations).index
    
    return movies[movies['movieId'].isin(top_indices)][['title', 'genres']]

In [8]:
# Example usage
user_id = int(input("Enter User ID for recommendations: "))
recommendations = get_user_recomendations(user_id)
print(recommendations)

                                          title                genres
52                              Lamerica (1994)                 Drama
777          Gate of Heavenly Peace, The (1995)           Documentary
977   Schlafes Bruder (Brother of Sleep) (1995)                 Drama
1762                    Follow the Bitch (1998)                Comedy
3103                    Ulysses (Ulisse) (1954)             Adventure
3164                       Smashing Time (1967)                Comedy
3313                     Song of Freedom (1936)                 Drama
3538                   One Little Indian (1973)  Comedy|Drama|Western
3587                               Lured (1947)                 Crime
3811                   Bittersweet Motel (2000)           Documentary
