In [1]:
#Import modules

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity 

In [3]:
# Load ratings data
ratings = pd.read_csv('u.data', sep='\t', 
                      names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie titles
movies = pd.read_csv('u.item', sep='|', encoding='latin-1', #Handles special characters in movie titles
                     names=['movie_id', 'title'], usecols=[0,1] #only loads the first two columns
                    )

# Merge data
df = pd.merge(ratings, movies, on='movie_id')
'''Uses an inner join by default (only movies 
present in both datasets are kept)'''

# Explore
print("Top 5 rated movies:")
# Displays most popular movies in descending order
print(df.groupby('title')['rating'].count().sort_values(ascending=False).head())

Top 5 rated movies:
title
Star Wars (1977)             583
Contact (1997)               509
Fargo (1996)                 508
Return of the Jedi (1983)    507
Liar Liar (1997)             485
Name: rating, dtype: int64


In [4]:
# Pivot table (fill missing with 0)
user_movie_matrix = df.pivot_table(
    index='user_id', 
    columns='title', 
    values='rating', 
    fill_value=0
)

print("Matrix shape:", user_movie_matrix.shape)

Matrix shape: (943, 1664)


In [5]:
# Compute similarity between MOVIES (columns)
movie_similarity = cosine_similarity(user_movie_matrix.T)  # Transpose to movie-user
'''Cosine Similarity measures how similar two movies’ rating 
patterns are (0 = no similarity, 1 = identical).'''
# Convert to DataFrame
movie_similarity_df = pd.DataFrame(
    movie_similarity,
    index=user_movie_matrix.columns,  # Movie titles as index
    columns=user_movie_matrix.columns
)

In [6]:
def recommend_movies(movie_title, num_recommendations=5):
    # Get similarity scores for the movie
    similar_scores = movie_similarity_df[movie_title]
    
    # Sort by score (descending)
    similar_movies = similar_scores.sort_values(ascending=False)
    
    # Exclude the input movie itself & return top N
    return similar_movies[1:num_recommendations+1].index.tolist()

In [7]:
# Example: Get recommendations for "Toy Story (1995)"
print("Recommendations for 'Toy Story (1995)':")
print(recommend_movies('Toy Story (1995)', num_recommendations=5))

Recommendations for 'Toy Story (1995)':
['Star Wars (1977)', 'Return of the Jedi (1983)', 'Independence Day (ID4) (1996)', 'Rock, The (1996)', 'Mission: Impossible (1996)']
