In [1]:
# Import required libraries for data processing and machine learning 
import numpy as np  # For numerical operations (optional for future use)
import pandas as pd  # For handling and analyzing structured data
import difflib  # For fuzzy matching of movie titles
from sklearn.feature_extraction.text import TfidfVectorizer  # Convert text to feature vectors
from sklearn.metrics.pairwise import cosine_similarity  # Measure similarity between vectors


In [None]:
# Load the movie dataset from CSV
movies_df = pd.read_csv("movies.csv")

In [3]:
# Define the metadata columns used for content-based filtering
feature_columns = ["genres", "keywords", "tagline", "cast", "director"]

# Fill missing values in these columns with empty strings
for column in feature_columns:
    movies_df[column] = movies_df[column].fillna("")

In [4]:
# Combine the selected metadata into a single string per movie
combined_features_series = (
    movies_df["genres"] + " " +
    movies_df["keywords"] + " " +
    movies_df["tagline"] + " " +
    movies_df["cast"] + " " +
    movies_df["director"]
)


In [5]:
# Convert the combined metadata into TF-IDF feature vectors
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(combined_features_series)

In [6]:
# Compute cosine similarity between all movie vectors
cosine_similarity_matrix = cosine_similarity(tfidf_matrix)

In [1]:
# Prompt the user to enter a movie name
user_input_title = input("🎬 Enter the movie name: ")
print(user_input_title)

iron man


In [8]:
# Extract all titles and find the closest match
all_movie_titles = movies_df["title"].tolist()
matching_titles = difflib.get_close_matches(user_input_title, all_movie_titles)


In [None]:
# If a matching title is found, recommend similar movies
if matching_titles:
    reference_title = matching_titles[0]
    reference_index = movies_df[movies_df["title"] == reference_title]["index"].values[0]
    
    similarity_scores = list(enumerate(cosine_similarity_matrix[reference_index]))
    sorted_recommendations = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    print(f"\n 🔎 Best match found: {reference_title}")
    print("\n📽️ Based on your interest, here are some movies you may like:\n")

    recommendation_count = 1
    for idx, score in sorted_recommendations[1:]:
        recommended_title = movies_df[movies_df["index"] == idx]["title"].values[0]
        if recommendation_count <= 7:
            print(f"{recommendation_count}: {recommended_title}")
            recommendation_count += 1

# If no close match found, show a message
else:
    print("No close match found. Please try a different title.")



 🔎 Best match found: Iron Man

📽️ Based on your interest, here are some movies you may like:

1: Iron Man 2
2: Iron Man 3
3: Avengers: Age of Ultron
4: The Avengers
5: Captain America: Civil War
6: Captain America: The Winter Soldier
7: Ant-Man
