Assignment 1 : Building a movie recommendation system

Name: Md. Riad Hossain

ID: 2125051029

Batch: 50

Section: 8A

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
# import necessary library
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import drive

In [18]:
# ✅ Step 2: Load datasets from Google Drive
ratings = pd.read_csv("/content/drive/MyDrive/UITS/Data Mining Lab/u.data", sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv("/content/drive/MyDrive/UITS/Data Mining Lab/u.item", sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['movie_id', 'title'])

In [20]:
# ✅ Step 3: Create the user-movie rating matrix
user_movie_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

# ✅ Step 4: Transpose to get movie-feature matrix (movies as rows)
movie_features = user_movie_matrix.T

# ✅ Step 5: Compute cosine similarity between movies
similarity_matrix = cosine_similarity(movie_features)
similarity_matrix = pd.DataFrame(similarity_matrix, index=movie_features.index, columns=movie_features.index)

In [21]:
# ✅ Step 6: Function to recommend movies similar to a given movie ID
def recommend_movies(movie_id, top_n=5):
    if movie_id not in similarity_matrix.index:
        print(f"Movie ID {movie_id} not found.")
        return pd.DataFrame()
    similar_scores = similarity_matrix.loc[movie_id].drop(labels=[movie_id])
    top_similar = similar_scores.sort_values(ascending=False).head(top_n)
    return movies[movies['movie_id'].isin(top_similar.index)]

In [22]:
# ✅ Step 7: Function to generate personalized recommendations for a user
def personalized_recommendations(user_id, top_n=10):
    user_ratings = ratings[ratings['user_id'] == user_id].sort_values(by='rating', ascending=False)
    top_movies = user_ratings['movie_id'].head(3)
    seen_movies = set(user_ratings['movie_id'])
    unseen_movies = set(movies['movie_id']) - seen_movies

    recommendations = {}
    for movie in top_movies:
        if movie not in similarity_matrix.index:
            continue
        similar_movies = similarity_matrix.loc[movie].drop(labels=[movie])
        for sim_movie, score in similar_movies.items():
            if sim_movie in unseen_movies:
                if sim_movie in recommendations:
                    recommendations[sim_movie] += score
                else:
                    recommendations[sim_movie] = score

    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    top_recs = [movie_id for movie_id, _ in sorted_recommendations[:top_n]]
    avg_ratings = ratings.groupby('movie_id')['rating'].mean()
    top_recs_sorted = sorted(top_recs, key=lambda x: avg_ratings.get(x, 0), reverse=True)
    return movies[movies['movie_id'].isin(top_recs_sorted)]



In [23]:
# ✅ Step 8: Example Usage
print("Similar movies to movie ID 1:")
display(recommend_movies(1))

print("Personalized recommendations for user ID 10:")
display(personalized_recommendations(10))

Similar movies to movie ID 1:


Unnamed: 0,movie_id,title
49,50,Star Wars (1977)
116,117,"Rock, The (1996)"
120,121,Independence Day (ID4) (1996)
180,181,Return of the Jedi (1983)
404,405,Mission: Impossible (1996)


Personalized recommendations for user ID 10:


Unnamed: 0,movie_id,title
88,89,Blade Runner (1982)
130,131,Breakfast at Tiffany's (1961)
426,427,To Kill a Mockingbird (1962)
480,481,"Apartment, The (1960)"
491,492,East of Eden (1955)
493,494,His Girl Friday (1940)
505,506,Rebel Without a Cause (1955)
506,507,"Streetcar Named Desire, A (1951)"
513,514,Annie Hall (1977)
606,607,Rebecca (1940)
