In [13]:
# libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [14]:
# Load movie data
movies = pd.read_csv("movies.csv").head(5000)
movies['genres'] = movies['genres'].fillna('')


In [15]:
# Create TF-IDF vector for genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Compute cosine similarity between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Map movie titles to indices
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()


In [18]:
# closest match
def get_closest_match(title, titles):
    title = title.lower()
    for t in titles:
        if title in t.lower():
            return t
    return None

# Recommendation Function 
def recommend(title, top_n=5):
    if title not in indices:
        return []

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1] 
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()

# Main Program
if __name__ == '__main__':
    user_input = input("Enter a movie title: ").strip()

    # Try to match title
    matched_title = get_closest_match(user_input, movies['title'])

    if matched_title:
        results = recommend(matched_title)
        if results:
            print(f"\nBecause you liked '{matched_title}', you might also like:")
            for i, movie in enumerate(results, 1):
                print(f"{i}. {movie}")
        else:
            print("No recommendations found.")
    else:
        print(f"Movie '{user_input}' not found in the database.")

Enter a movie title: Father of the Bride Part II

Because you liked 'Father of the Bride Part II (1995)', you might also like:
1. Four Rooms (1995)
2. Ace Ventura: When Nature Calls (1995)
3. Bio-Dome (1996)
4. Friday (1995)
5. Black Sheep (1996)
