In [None]:

# ðŸŽ¬ Movie Recommendation System in Google Colab
# ===============================================

# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Load dataset (fixed GitHub source)
movies_url = "https://raw.githubusercontent.com/vamshi121/TMDB-5000-Movie-Dataset/main/tmdb_5000_movies.csv"

movies = pd.read_csv(movies_url)
print("Movies data:")
print(movies.head())

# Step 3: Select and preprocess features
movies = movies[['id', 'title', 'genres', 'overview', 'keywords', 'tagline']]
movies.fillna('', inplace=True)

# Combine text features
movies['combined_features'] = movies['overview'] + ' ' + movies['tagline'] + ' ' + movies['keywords'] + ' ' + movies['genres']

# Step 4: Convert text to feature vectors
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(stop_words='english', max_features=5000)
count_matrix = cv.fit_transform(movies['combined_features'])

# Step 5: Compute cosine similarity
cosine_sim = cosine_similarity(count_matrix)

# Step 6: Create a Series with movie titles and indices
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Step 7: Recommendation function
def recommend_movies(title, num_recommendations=5):
    if title not in indices:
        return "Movie not found in the dataset!"
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Step 8: Example usage
movie_name = "Avatar"
print(f"Movies similar to '{movie_name}':\n")
print(recommend_movies(movie_name))

# Step 9: Visualization
similar_movies = recommend_movies(movie_name)
scores = [cosine_sim[indices[movie_name]][indices[m]] for m in similar_movies]

sns.barplot(x=scores, y=similar_movies)
plt.title(f"Movies similar to '{movie_name}'")
plt.xlabel("Similarity Score")
plt.show()
