In [None]:
import difflib
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Read the dataset
df = pd.read_csv('movies.csv')

# Fill missing values with empty strings
for feature in ['overview', 'keywords', 'genres', 'cast', 'director']:
    df[feature] = df[feature].fillna('')

# Combine relevant features into a single string
df['combined_features'] = df['overview'] + ' ' + df['keywords'] + ' ' + df['genres'] + ' ' + df['cast'] + ' ' + df['director']

# Convert text data to feature vectors using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Calculate the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recommendations based on movie title (with partial matching)
def get_recommendations(title, cosine_sim=cosine_sim):
    # Clean the input title
    title = title.strip().lower()

    # Find close matches for the movie title in the dataset
    close_matches = difflib.get_close_matches(title, df['title'].str.lower(), n=5, cutoff=0.4)

    if not close_matches:
        print(f"Sorry, no movies found matching '{title}'.")
        return

    # If there are multiple close matches, prompt the user to select the correct one
    print(f"\nDid you mean one of these movies?")
    for i, match in enumerate(close_matches, 1):
        print(f"{i}. {match}")

    try:
        choice = int(input("\nEnter the number of the correct movie (1, 2, 3, etc.): "))
        selected_title = close_matches[choice - 1]
    except (ValueError, IndexError):
        print("Invalid selection.")
        return

    # Get the index of the selected movie
    idx = df[df['title'].str.lower() == selected_title].index[0]

    # Get similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top 10 similar movies (excluding the input movie)
    movie_indices = [i[0] for i in sim_scores[1:11]]

    # Print recommendations based on the selected movie
    print(f"\nBased on the movie '{selected_title}', here are some recommended movies for you:\n")
    recommendations = df['title'].iloc[movie_indices]

    for i, movie in enumerate(recommendations, 1):
        print(f"{i}. {movie}")

# Ask the user to input a movie title
user_movie = input("Enter a movie title for recommendations: ")

# Get recommendations based on the user's input
get_recommendations(user_movie)


Enter a movie title for recommendations: batman

Did you mean one of these movies?
1. batman
2. batman
3. ant-man
4. catwoman
5. taxman

Enter the number of the correct movie (1, 2, 3, etc.): 3

Based on the movie 'ant-man', here are some recommended movies for you:

1. X-Men: Apocalypse
2. X-Men
3. The Avengers
4. X-Men: Days of Future Past
5. Man of Steel
6. Iron Man 2
7. Kick-Ass
8. The Incredible Hulk
9. Deadpool
10. X-Men: The Last Stand
