In [6]:
# Movie Recommendation System using Content-Based Filtering

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load dataset
movies = pd.read_csv(r"F:\project.py\data set predection\Movie Recommendation system\movies.csv")  # Make sure this file is from MovieLens 100K dataset

# Combine genres into a single string (some datasets may not have description or tags)
movies['genres'] = movies['genres'].fillna('')

# TF-IDF Vectorizer to convert text to numeric matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute similarity using cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create reverse mapping of movie titles to indices
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Recommendation function
def recommend(title, num_recommendations=5):
    if title not in indices:
        return f"'{title}' not found in the movie list."
    
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    movie_indices = [i[0] for i in sim_scores]
    
    return movies['title'].iloc[movie_indices]

# Example usage
movie_to_search = input("Enter the movie name:")
print(f"\nMovies similar to '{movie_to_search}':\n")
print(recommend(movie_to_search))


Enter the movie name: Toy Story (1995)



Movies similar to 'Toy Story (1995)':

1706                                       Antz (1998)
2355                                Toy Story 2 (1999)
2809    Adventures of Rocky and Bullwinkle, The (2000)
3000                  Emperor's New Groove, The (2000)
3568                             Monsters, Inc. (2001)
Name: title, dtype: object
