In [1]:
# importing libraries
import pandas as pd
import numpy as np

In [2]:
# load ratings data
ratings = pd.read_csv("ratings.csv")

In [3]:
# drop timestamp (not needed)
ratings = ratings.drop(columns=['timestamp'])

In [4]:
# load movies data
movies = pd.read_csv("movies.csv")

In [5]:
# merge ratings with movies
data = pd.merge(ratings, movies, on='movieId')

In [6]:
# select required columns
data = data[['userId', 'movieId', 'rating', 'title']]

In [7]:
# view data
data.head()

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,1,3,4.0,Grumpier Old Men (1995)
2,1,6,4.0,Heat (1995)
3,1,47,5.0,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,"Usual Suspects, The (1995)"


# Create User–Item Matrix (Pivot Table)

In [8]:
user_item_matrix = data.pivot_table(
    index='userId',
    columns='movieId',
    values='rating'
)

In [9]:
user_item_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,


# Fill Missing Values

In [10]:
user_item_filled=user_item_matrix.fillna(0)

# Compute Cosine Similarity

In [11]:
from sklearn.metrics.pairwise import cosine_similarity
user_similarity = cosine_similarity(user_item_filled)
user_similarity_df = pd.DataFrame(user_similarity,index=user_item_matrix.index,columns=user_item_matrix.index)
user_similarity_df

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.027283,0.059720,0.194395,0.129080,0.128152,0.158744,0.136968,0.064263,0.016875,...,0.080554,0.164455,0.221486,0.070669,0.153625,0.164191,0.269389,0.291097,0.093572,0.145321
2,0.027283,1.000000,0.000000,0.003726,0.016614,0.025333,0.027585,0.027257,0.000000,0.067445,...,0.202671,0.016866,0.011997,0.000000,0.000000,0.028429,0.012948,0.046211,0.027565,0.102427
3,0.059720,0.000000,1.000000,0.002251,0.005020,0.003936,0.000000,0.004941,0.000000,0.000000,...,0.005048,0.004892,0.024992,0.000000,0.010694,0.012993,0.019247,0.021128,0.000000,0.032119
4,0.194395,0.003726,0.002251,1.000000,0.128659,0.088491,0.115120,0.062969,0.011361,0.031163,...,0.085938,0.128273,0.307973,0.052985,0.084584,0.200395,0.131746,0.149858,0.032198,0.107683
5,0.129080,0.016614,0.005020,0.128659,1.000000,0.300349,0.108342,0.429075,0.000000,0.030611,...,0.068048,0.418747,0.110148,0.258773,0.148758,0.106435,0.152866,0.135535,0.261232,0.060792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.164191,0.028429,0.012993,0.200395,0.106435,0.102123,0.200035,0.099388,0.075898,0.088963,...,0.178084,0.116534,0.300669,0.066032,0.148141,1.000000,0.153063,0.262558,0.069622,0.201104
607,0.269389,0.012948,0.019247,0.131746,0.152866,0.162182,0.186114,0.185142,0.011844,0.010451,...,0.092525,0.199910,0.203540,0.137834,0.118780,0.153063,1.000000,0.283081,0.149190,0.139114
608,0.291097,0.046211,0.021128,0.149858,0.135535,0.178809,0.323541,0.187233,0.100435,0.077424,...,0.158355,0.197514,0.232771,0.155306,0.178142,0.262558,0.283081,1.000000,0.121993,0.322055
609,0.093572,0.027565,0.000000,0.032198,0.261232,0.214234,0.090840,0.423993,0.000000,0.021766,...,0.035653,0.335231,0.061941,0.236601,0.097610,0.069622,0.149190,0.121993,1.000000,0.053225


# Choose a target user

In [12]:
target_user = 4

# Find similar users

In [13]:
similar_users = user_similarity_df[target_user] \
    .sort_values(ascending=False)
similar_users

userId
4      1.000000
391    0.317541
603    0.307973
156    0.293321
275    0.281351
         ...   
252    0.000000
578    0.000000
175    0.000000
92     0.000000
53     0.000000
Name: 4, Length: 610, dtype: float64

# Remove the user itself

In [14]:
similar_users = similar_users.drop(target_user)

# Select top-N similar users

In [16]:
top_similar_users = similar_users.head(3)
top_similar_users

userId
391    0.317541
603    0.307973
156    0.293321
Name: 4, dtype: float64

# Find movies the target user has NOT rated

In [17]:
target_user_ratings = user_item_matrix.loc[target_user]

unrated_movies = target_user_ratings[target_user_ratings.isna()].index
#In user-based recommendation, we must find items the target user has NOT rated yet.
#In item-based, we usually start from a specific item the user liked, so unrated items are handled differently

# Predict ratings

In [18]:
predicted_ratings = {}

for movie in unrated_movies:
    numerator = 0
    denominator = 0

    for sim_user, sim_score in top_similar_users.items():
        rating = user_item_matrix.loc[sim_user, movie]

        if not np.isnan(rating):
            numerator += sim_score * rating
            denominator += sim_score

    if denominator != 0:
        predicted_ratings[movie] = numerator / denominator


# Sort and recommend top movies

In [19]:
recommended_movies = sorted(
    predicted_ratings.items(),
    key=lambda x: x[1],
    reverse=True
)

# Show movie titles instead of IDs

In [20]:
for movie_id, score in recommended_movies[:5]:
    title = data[data['movieId'] == movie_id]['title'].iloc[0]
    print(title, "→ predicted rating:", round(score, 2))

Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964) → predicted rating: 5.0
Maltese Falcon, The (1941) → predicted rating: 5.0
My Man Godfrey (1936) → predicted rating: 5.0
Duck Soup (1933) → predicted rating: 5.0
Breaking the Waves (1996) → predicted rating: 5.0


In [21]:
for movie_id, score in recommended_movies[:5]:
    title = data[data['movieId'] == movie_id]['title'].iloc[0]
    print(title)

Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
Maltese Falcon, The (1941)
My Man Godfrey (1936)
Duck Soup (1933)
Breaking the Waves (1996)
