In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def load_data():
    movie = pd.read_csv('/content/drive/MyDrive/tmdb_5000_movies.csv')
    return movie

In [3]:
def create_similarity_matrix(movie):
    tfidf = TfidfVectorizer(stop_words='english')
    movie['genres'] = movie['genres'].fillna('')
    tfidf_matrix = tfidf.fit_transform(movie['genres'])
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    indices = pd.Series(movie.index, index=movie['original_title']).drop_duplicates()
    return cosine_sim, indices, tfidf

In [4]:
def get_movie_by_genre(movie, genre):
    matched_movie = movie[movie['genres'].str.contains(genre, case=False, na=False)]
    return matched_movie

In [5]:
movie = load_data()
user_genre = input("Enter a genre: ")
matched_movie = get_movie_by_genre(movie, user_genre)
if not matched_movie.empty:
    print(f"movie matching the genre '{user_genre}':")
    print(matched_movie[['original_title', 'genres']])
else:
    print(f"No movie found for the genre '{user_genre}'.")

Enter a genre: comedy
movie matching the genre 'comedy':
                    original_title  \
18                  Men in Black 3   
40                          Cars 2   
42                     Toy Story 3   
55                           Brave   
58                     Rush Hour 3   
...                            ...   
4786              Breaking Upwards   
4788                Pink Flamingos   
4794  Sanctuary: Quite a Conundrum   
4799                     Newlyweds   
4800     Signed, Sealed, Delivered   

                                                 genres  
18    [{"id": 28, "name": "Action"}, {"id": 35, "nam...  
40    [{"id": 16, "name": "Animation"}, {"id": 10751...  
42    [{"id": 16, "name": "Animation"}, {"id": 10751...  
55    [{"id": 16, "name": "Animation"}, {"id": 12, "...  
58    [{"id": 28, "name": "Action"}, {"id": 35, "nam...  
...                                                 ...  
4786  [{"id": 35, "name": "Comedy"}, {"id": 10749, "...  
4788  [{"id": 27, "nam