# Importing required packages

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load the movie ratings dataset

In [4]:
ratings_df = pd.read_csv('rating.csv')

In [5]:
ratings_df1=ratings_df.head(200)

In [7]:
ratings_df1

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
195,2,1270,5.0,2000-11-21 15:36:54
196,2,1327,5.0,2000-11-21 15:34:06
197,2,1356,5.0,2000-11-21 15:29:58
198,2,1544,5.0,2000-11-21 15:35:43


# Load the movies dataset

In [8]:
movies_df = pd.read_csv('movie.csv')

In [9]:
movies_df1=movies_df.head(200)

In [10]:
movies_df1

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
195,197,"Stars Fell on Henrietta, The (1995)",Drama
196,198,Strange Days (1995),Action|Crime|Drama|Mystery|Sci-Fi|Thriller
197,199,"Umbrellas of Cherbourg, The (Parapluies de Che...",Drama|Musical|Romance
198,200,"Tie That Binds, The (1995)",Thriller


In [11]:
df = pd.merge(movies_df1, ratings_df1, on='movieId')

# Create a pivot table

In [12]:
ratings_matrix = df.pivot_table(index='userId', columns='title', values='rating')

In [15]:
ratings_matrix

title,Braveheart (1995),"City of Lost Children, The (Cité des enfants perdus, La) (1995)",From Dusk Till Dawn (1996),Grumpier Old Men (1995),Jumanji (1995),Mr. Holland's Opus (1995),Rob Roy (1995),Rumble in the Bronx (Hont faan kui) (1995),Seven (a.k.a. Se7en) (1995),Twelve Monkeys (a.k.a. 12 Monkeys) (1995),"Usual Suspects, The (1995)"
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.0,3.5,0.0,0.0,3.5,0.0,4.0,3.5,3.5,3.5,3.5
2,4.0,0.0,5.0,4.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0


# Fill missing values with zeros

In [13]:
ratings_matrix = ratings_matrix.fillna(0)

In [14]:
ratings_matrix

title,Braveheart (1995),"City of Lost Children, The (Cité des enfants perdus, La) (1995)",From Dusk Till Dawn (1996),Grumpier Old Men (1995),Jumanji (1995),Mr. Holland's Opus (1995),Rob Roy (1995),Rumble in the Bronx (Hont faan kui) (1995),Seven (a.k.a. Se7en) (1995),Twelve Monkeys (a.k.a. 12 Monkeys) (1995),"Usual Suspects, The (1995)"
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.0,3.5,0.0,0.0,3.5,0.0,4.0,3.5,3.5,3.5,3.5
2,4.0,0.0,5.0,4.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0


# Calculate pairwise similarity matrix

In [16]:
movie_similarity = cosine_similarity(ratings_matrix.T)

# Function to get top n similar movies

In [18]:
def get_similar_movies(movie_name, n):
    movie_index = list(ratings_matrix.columns).index(movie_name)
    movie_similarity_score = movie_similarity[movie_index]
    movie_similarity_score = list(enumerate(movie_similarity_score))
    movie_similarity_score = sorted(movie_similarity_score, key=lambda x: x[1], reverse=True)
    movie_similarity_score = movie_similarity_score[1:n+1]
    movie_indices = [i[0] for i in movie_similarity_score]
    return list(ratings_matrix.columns[movie_indices])

# Test the function

In [19]:
get_similar_movies('Jumanji (1995)', 5)

['Jumanji (1995)',
 'Rob Roy (1995)',
 'Rumble in the Bronx (Hont faan kui) (1995)',
 'Seven (a.k.a. Se7en) (1995)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)']