In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

rating = pd.read_csv(r'D:\FastSemesters\semester6\Recommender_System(E)\A1\ml-latest-small\ratings.csv')

# Load the sparse userID-movieID pivot table
ratings_df = rating.pivot_table(index='userId', columns='movieId', values='rating')

# Compute the mean ratings for each user
user_means = ratings_df.mean(axis=1, skipna=True)

# Center the ratings around the user mean
centered_ratings_df = ratings_df.sub(user_means, axis=0).fillna(0)

# Compute the cosine similarity between each pair of users
user_similarities = pd.DataFrame(cosine_similarity(centered_ratings_df), index=centered_ratings_df.index, columns=centered_ratings_df.index)

# Define a function to predict the rating of a movie for a given user
def predict_rating(user_id, movie_id):
    # Find the 10 most similar users to the given user
    similar_users = user_similarities[user_id].nlargest(10).index
    # Get the ratings of those users for the given movie
    ratings = centered_ratings_df.loc[similar_users, movie_id].dropna()
    
    if ratings.empty:
        # If none of the similar users have rated the movie, return the global mean rating
        return user_means[user_id]
    else:
        # Otherwise, predict the rating as the weighted average of the similar users' ratings
        similarities = user_similarities.loc[user_id, ratings.index]
        weighted_ratings = similarities * ratings
        return user_means[user_id] + weighted_ratings.sum() / similarities.sum()

# Example usage: predict the top 5 recommended movies for user 1
user_id = 606
rated_movies = ratings_df.loc[user_id].dropna().index
unrated_movies = ratings_df.columns.difference(rated_movies)

predicted_ratings = [predict_rating(user_id, movie_id) for movie_id in unrated_movies]

top_movies = pd.Series(predicted_ratings, index=unrated_movies).nlargest(5).index
print(top_movies)


Int64Index([608, 4011, 2529, 919, 4878], dtype='int64', name='movieId')


In [2]:
movie = pd.read_csv(r'D:\FastSemesters\semester6\Recommender_System(E)\A1\ml-latest-small\movies.csv')

for i in range(len(movie)):
    if movie['movieId'][i] in top_movies:
        print(movie['movieId'][i]," ",movie['title'][i])

608   Fargo (1996)
919   Wizard of Oz, The (1939)
2529   Planet of the Apes (1968)
4011   Snatch (2000)
4878   Donnie Darko (2001)


In [4]:
user_similarities

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.001265,0.000553,0.048419,0.021847,-0.045497,-0.006200,0.047013,0.019510,-0.008754,...,0.018127,-0.017172,-0.015221,-0.037059,-0.029121,0.012016,0.055261,0.075224,-0.025713,0.010932
2,0.001265,1.000000,0.000000,-0.017164,0.021796,-0.021051,-0.011114,-0.048085,0.000000,0.003012,...,-0.050551,-0.031581,-0.001688,0.000000,0.000000,0.006226,-0.020504,-0.006001,-0.060091,0.024999
3,0.000553,0.000000,1.000000,-0.011260,-0.031539,0.004800,0.000000,-0.032471,0.000000,0.000000,...,-0.004904,-0.016117,0.017749,0.000000,-0.001431,-0.037289,-0.007789,-0.013001,0.000000,0.019550
4,0.048419,-0.017164,-0.011260,1.000000,-0.029620,0.013956,0.058091,0.002065,-0.005874,0.051590,...,-0.037687,0.063122,0.027640,-0.013782,0.040037,0.020590,0.014628,-0.037569,-0.017884,-0.000995
5,0.021847,0.021796,-0.031539,-0.029620,1.000000,0.009111,0.010117,-0.012284,0.000000,-0.033165,...,0.015964,0.012427,0.027076,0.012461,-0.036272,0.026319,0.031896,-0.001751,0.093829,-0.000278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.012016,0.006226,-0.037289,0.020590,0.026319,-0.009137,0.028326,0.022277,0.031633,-0.039946,...,0.053683,0.016384,0.098011,0.061078,0.019678,1.000000,0.017927,0.056676,0.038422,0.075464
607,0.055261,-0.020504,-0.007789,0.014628,0.031896,0.045501,0.030981,0.048822,-0.012161,-0.017656,...,0.049059,0.038197,0.049317,0.002355,-0.029381,0.017927,1.000000,0.044514,0.019049,0.021860
608,0.075224,-0.006001,-0.013001,-0.037569,-0.001751,0.021727,0.028414,0.071759,0.032783,-0.052000,...,0.069198,0.051388,0.012801,0.006319,-0.007978,0.056676,0.044514,1.000000,0.050714,0.054454
609,-0.025713,-0.060091,0.000000,-0.017884,0.093829,0.053017,0.008754,0.077180,0.000000,-0.040090,...,0.043465,0.062400,0.015334,0.094038,-0.054722,0.038422,0.019049,0.050714,1.000000,-0.012471


In [5]:
ratings_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [6]:
user_means

userId
1      4.366379
2      3.948276
3      2.435897
4      3.555556
5      3.636364
         ...   
606    3.657399
607    3.786096
608    3.134176
609    3.270270
610    3.688556
Length: 610, dtype: float64

In [7]:
similar_users

NameError: name 'similar_users' is not defined