In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Sample dataset of movies and their descriptions
movies = {
    'title': [
        'The Matrix', 'John Wick', 'Avengers: Endgame', 'The Dark Knight', 'Inception',
        'Interstellar', 'Gladiator', 'The Godfather', 'The Shawshank Redemption', 'Fight Club'
    ],
    'description': [
        'A computer hacker learns about the true nature of his reality and his role in the war against its controllers.',
        'An ex-hitman comes out of retirement to track down the gangsters that killed his dog.',
        'Superheroes unite to battle the powerful Thanos and save the universe.',
        'Batman sets out to dismantle the remaining criminal organizations that plague Gotham.',
        'A thief who steals corporate secrets through use of dream-sharing technology.',
        'A team travels through a wormhole in space in an attempt to ensure humanity’s survival.',
        'A former Roman general sets out to exact vengeance against the corrupt emperor.',
        'The aging patriarch of an organized crime dynasty transfers control of his empire to his son.',
        'Two imprisoned men bond over a number of years, finding solace and redemption.',
        'An insomniac office worker and a soap maker form an underground fight club.'
    ]
}

# Step 2: Load data into a DataFrame
df = pd.DataFrame(movies)

# Step 3: Convert the descriptions into vectors using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['description'])

# Step 4: Calculate the similarity between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 5: Map movie titles to DataFrame indices
title_to_index = pd.Series(df.index, index=df['title'])

# Step 6: Recommendation function
def recommend_movie(title, sim_matrix=cosine_sim):
    if title not in title_to_index:
        return "Oops! Movie not found. Please check the title and try again."

    idx = title_to_index[title]
    similarity_scores = list(enumerate(sim_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:6]  # Top 5 excluding the same movie

    recommended_indices = [i[0] for i in similarity_scores]
    return df['title'].iloc[recommended_indices].tolist()

# Example usage
user_input = "Inception"
recommendations = recommend_movie(user_input)

print(f"\n🎥 Because you liked *{user_input}*, you might also enjoy:")
for i, movie in enumerate(recommendations, 1):
    print(f"{i}. {movie}")



🎥 Because you liked *Inception*, you might also enjoy:
1. The Matrix
2. John Wick
3. Avengers: Endgame
4. The Dark Knight
5. Interstellar
