In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
ratings = pd.read_csv("Dataset/ratings.csv")  
movies = pd.read_csv("Dataset/movies.csv") 

In [3]:
print("Ratings:\n", ratings.head())
print("\nMovies:\n", movies.head())

Ratings:
    userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931

Movies:
    movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [4]:
data = pd.merge(ratings, movies, on="movieId")
print("\nMerged Data:\n", data.head())


Merged Data:
    userId  movieId  rating  timestamp                        title  \
0       1        1     4.0  964982703             Toy Story (1995)   
1       1        3     4.0  964981247      Grumpier Old Men (1995)   
2       1        6     4.0  964982224                  Heat (1995)   
3       1       47     5.0  964983815  Seven (a.k.a. Se7en) (1995)   
4       1       50     5.0  964982931   Usual Suspects, The (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                               Comedy|Romance  
2                        Action|Crime|Thriller  
3                             Mystery|Thriller  
4                       Crime|Mystery|Thriller  


In [5]:
user_item_matrix = data.pivot_table(index="userId", columns="title", values="rating").fillna(0)
print("\nUser-Item Matrix Shape:", user_item_matrix.shape)


User-Item Matrix Shape: (610, 9719)


In [6]:
similarity_matrix = cosine_similarity(user_item_matrix)
similarity_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)
print("\nUser Similarity Matrix:\n", similarity_df.head())


User Similarity Matrix:
 userId       1         2         3         4         5         6         7    \
userId                                                                         
1       1.000000  0.027283  0.059720  0.194395  0.129080  0.128152  0.158744   
2       0.027283  1.000000  0.000000  0.003726  0.016614  0.025333  0.027585   
3       0.059720  0.000000  1.000000  0.002251  0.005020  0.003936  0.000000   
4       0.194395  0.003726  0.002251  1.000000  0.128659  0.088491  0.115120   
5       0.129080  0.016614  0.005020  0.128659  1.000000  0.300349  0.108342   

userId       8         9         10   ...       601       602       603  \
userId                                ...                                 
1       0.136968  0.064263  0.016875  ...  0.080554  0.164455  0.221486   
2       0.027257  0.000000  0.067445  ...  0.202671  0.016866  0.011997   
3       0.004941  0.000000  0.000000  ...  0.005048  0.004892  0.024992   
4       0.062969  0.011361  0.031163  

In [7]:
def recommend_movies(user_id, top_n=5):
    similar_users = similarity_df[user_id].sort_values(ascending=False)
    similar_users = similar_users.drop(user_id)
    most_similar_user = similar_users.index[0]
    user_movies = set(data[data["userId"]==user_id]["title"])
    similar_user_movies = data[(data["userId"]==most_similar_user) & (data["rating"]>=4)]["title"]
    recommendations = [m for m in similar_user_movies if m not in user_movies]
    return recommendations[:top_n]

In [8]:
print("\nRecommended Movies for User 10:")
print(recommend_movies(10, top_n=5))


Recommended Movies for User 10:
['Toy Story (1995)', 'Apollo 13 (1995)', 'Shawshank Redemption, The (1994)', 'Ghost (1990)', 'Princess Bride, The (1987)']
