In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = "IMDB-Movie-Data.csv"  # Change this if the file path is different
df = pd.read_csv(file_path)

# Fill NaN values in Genre column
df["Genre"] = df["Genre"].fillna("")

# TF-IDF Vectorization for Genre column
tfidf = TfidfVectorizer(stop_words="english")
genre_matrix = tfidf.fit_transform(df["Genre"])

# Compute cosine similarity between movie genres
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)

# Function to get movie recommendations based on title
def recommend_movies(title, num_recommendations=5):
    # Get the movie index
    idx = df[df["Title"].str.lower() == title.lower()].index
    if len(idx) == 0:
        return "Movie not found!"

    idx = idx[0]

    # Get similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies based on similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the top N similar movies (excluding the input movie itself)
    movie_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]

    # Return movie titles
    return df["Title"].iloc[movie_indices].tolist()

# Usage
if __name__ == "__main__":
    movie_name = input("Enter a movie name: ")  # User input for movie name
    recommendations = recommend_movies(movie_name)
    print(f"Movies similar to '{movie_name}':")
    for movie in recommendations:
        print("-", movie)


Enter a movie name: la la land
Movies similar to 'la la land':
- Sing Street
- Youth
- Magic Mike XXL
- Footloose
- Pitch Perfect 2
