# Task 5: Building a Product Recommendation System (Movie Recommender)
# Author: Syed Huzaifa Bin Khamis | Nexus AI Digital Internship

# 1. Import Libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# 2. Load Dataset

In [2]:
# Load movies dataset
movies = pd.read_csv("tmdb_5000_movies.csv")

# Select useful columns
movies = movies[['title', 'overview']]
movies.head()


Unnamed: 0,title,overview
0,Avatar,"In the 22nd century, a paraplegic Marine is di..."
1,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha..."
2,Spectre,A cryptic message from Bond’s past sends him o...
3,The Dark Knight Rises,Following the death of District Attorney Harve...
4,John Carter,"John Carter is a war-weary, former military ca..."


# 3. Handle Missing Values

In [3]:
# Fill missing overviews with empty strings
movies['overview'] = movies['overview'].fillna('')


# 4. TF-IDF Vectorization

In [4]:
# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform overview text
tfidf_matrix = tfidf.fit_transform(movies['overview'])

# Shape of matrix (rows = movies, columns = unique words)
print(tfidf_matrix.shape)


(4803, 20978)


# 5. Compute Cosine Similarity

In [5]:
# Compute similarity between all movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


# 6. Build Recommender Function

In [6]:
# Reset index for easy lookup
movies = movies.reset_index()
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def recommend_movies(title, num_recommendations=5):
    # Get index of the movie
    idx = indices[title]
    
    # Get pairwise similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Top recommendations (excluding the movie itself)
    sim_scores = sim_scores[1:num_recommendations+1]
    
    # Movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    return movies['title'].iloc[movie_indices]


# 7. Test the Recommender

In [7]:
print("🎬 Recommendations for 'The Dark Knight':")
print(recommend_movies("The Dark Knight"))

print("\n🎬 Recommendations for 'Avatar':")
print(recommend_movies("Avatar"))

print("\n🎬 Recommendations for 'Inception':")
print(recommend_movies("Inception"))


🎬 Recommendations for 'The Dark Knight':
3                         The Dark Knight Rises
428                              Batman Returns
3854    Batman: The Dark Knight Returns, Part 2
299                              Batman Forever
1359                                     Batman
Name: title, dtype: object

🎬 Recommendations for 'Avatar':
3604               Apollo 18
2130            The American
634               The Matrix
1341    The Inhabited Island
529         Tears of the Sun
Name: title, dtype: object

🎬 Recommendations for 'Inception':
2897                                Cypher
134     Mission: Impossible - Rogue Nation
1930                            Stone Cold
914                   Central Intelligence
1683                       Pitch Perfect 2
Name: title, dtype: object
