In [1]:
# Movie Recommendation System using Content-Based Filtering

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ----------------------------------------
# STEP 1: Load Dataset
# ----------------------------------------
# You can download 'movies.csv' from https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata
# Make sure the file is in the same folder as this script

movies = pd.read_csv("tmdb_5000_movies.csv")

# Display basic information
print("🎬 Dataset Loaded Successfully!")
print(movies.head())

# ----------------------------------------
# STEP 2: Select and Preprocess Data
# ----------------------------------------
# We'll use movie titles and genres
movies = movies[['title', 'genres']]
movies.dropna(inplace=True)

# Convert genres to lowercase
movies['genres'] = movies['genres'].str.lower()

# ----------------------------------------
# STEP 3: TF-IDF Vectorization
# ----------------------------------------
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(movies['genres'])

# ----------------------------------------
# STEP 4: Compute Cosine Similarity
# ----------------------------------------
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# ----------------------------------------
# STEP 5: Build Recommendation Function
# ----------------------------------------
def recommend_movie(title, n=5):
    if title not in movies['title'].values:
        print("❌ Movie not found in the dataset.")
        return

    # Get the index of the movie
    idx = movies[movies['title'] == title].index[0]

    # Get similarity scores
    scores = list(enumerate(similarity_matrix[idx]))

    # Sort movies based on similarity
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:n+1]

    print(f"\n🎥 Top {n} movies similar to '{title}':\n")
    for i, (movie_idx, score) in enumerate(sorted_scores):
        print(f"{i+1}. {movies.iloc[movie_idx]['title']}  (Similarity: {score:.2f})")

# ----------------------------------------
# STEP 6: Try the Recommendation System
# ----------------------------------------
recommend_movie("Toy Story", 5)


🎬 Dataset Loaded Successfully!
      budget                                             genres  \
0  237000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
1  300000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
2  245000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
3  250000000  [{"id": 28, "name": "Action"}, {"id": 80, "nam...   
4  260000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   

                                       homepage      id  \
0                   http://www.avatarmovie.com/   19995   
1  http://disney.go.com/disneypictures/pirates/     285   
2   http://www.sonypictures.com/movies/spectre/  206647   
3            http://www.thedarkknightrises.com/   49026   
4          http://movies.disney.com/john-carter   49529   

                                            keywords original_language  \
0  [{"id": 1463, "name": "culture clash"}, {"id":...                en   
1  [{"id": 270, "name": "ocean"}, {"id": 726, "na...           