In [14]:
#Import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

#Load the Dataset
data = pd.read_csv("/content/tmdb_5000_movies.csv")
data['overview'] = data['overview'].fillna('')

#Create TF-IDF Matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['overview'])

#Compute Cosine Similarity Matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

#Create Reverse Mapping of Indices
indices = pd.Series(data.index, index=data['title']).drop_duplicates()

#Recommendation Function
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices.get(title)
    if idx is None:
        return ["Movie not found."]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return data['title'].iloc[movie_indices].tolist()

#Call the Function and Show Output
movie_title = "Inception"  # Change to any title in the dataset
recommendations = get_recommendations(movie_title)

print(f"\nTop 10 movies similar to '{movie_title}':\n")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")



Top 10 movies similar to 'Inception':

1. Cypher
2. Mission: Impossible - Rogue Nation
3. Stone Cold
4. Central Intelligence
5. Pitch Perfect 2
6. At First Sight
7. A History of Violence
8. Renaissance
9. Blood and Wine
10. Duplex
