In [1]:
# importing libraries
import pandas as pd
import numpy as np

In [2]:
# load ratings data
ratings = pd.read_csv("ratings.csv")

In [3]:
# drop timestamp (not needed)
ratings = ratings.drop(columns=['timestamp'])

In [4]:
# load movies data
movies = pd.read_csv("movies.csv")

In [5]:
# merge ratings with movies
data = pd.merge(ratings, movies, on='movieId')

In [6]:
# select required columns
data = data[['userId', 'movieId', 'rating', 'title']]

In [7]:
user_item_matrix = data.pivot_table(
    index='userId',
    columns='movieId',
    values='rating'
)
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [8]:
item_user_matrix = user_item_matrix.T

In [9]:
item_user_filled = item_user_matrix.fillna(0)

In [10]:
# Compute Item-Item Cosine Similarity
from sklearn.metrics.pairwise import cosine_similarity

item_similarity = cosine_similarity(item_user_filled)

item_similarity_df = pd.DataFrame(
    item_similarity,
    index=item_user_matrix.index,
    columns=item_user_matrix.index
)
item_similarity

array([[1.        , 0.41056206, 0.2969169 , ..., 0.        , 0.        ,
        0.        ],
       [0.41056206, 1.        , 0.28243799, ..., 0.        , 0.        ,
        0.        ],
       [0.2969169 , 0.28243799, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 1.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]], shape=(9724, 9724))

In [11]:
target_movie = 8

In [12]:
# Find SImilar Movies
similar_movies = item_similarity_df[target_movie] \
                    .sort_values(ascending=False)


In [13]:
# Remove itself
similar_movies = similar_movies.drop(target_movie)

In [14]:
# Recommend Top-N Similar Movies
top_similar_movies = similar_movies.head(5)
top_similar_movies

movieId
271    0.644253
174    0.642321
502    0.602709
217    0.591864
542    0.583448
Name: 8, dtype: float64

In [15]:
for movie_id in similar_movies.index[:5]:
    print(movie_id)

271
174
502
217
542


In [16]:
# Show Movie Titles
for movie_id, score in top_similar_movies.items():
    title = data[data['movieId'] == movie_id]['title'].iloc[0]
    print(movie_id,title)

271 Losing Isaiah (1995)
174 Jury Duty (1995)
502 Next Karate Kid, The (1994)
217 Babysitter, The (1995)
542 Son in Law (1993)
