In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample movie dataset
data = {
    'title': [
        'The Matrix', 'Inception', 'Interstellar',
        'The Dark Knight', 'Pulp Fiction',
        'The Lord of the Rings', 'Avengers: Endgame', 'The Social Network'
    ],
    'description': [
        'A hacker discovers reality is a simulation',
        'A thief enters dreams to steal secrets',
        'Astronauts travel through a wormhole in space',
        'A vigilante fights crime in Gotham City',
        'Stories of crime and redemption in LA',
        'A quest to destroy a powerful ring',
        'Superheroes unite to battle a galactic enemy',
        'Rise of Facebook and tech rivalry'
    ]
}

df = pd.DataFrame(data)

# Vectorize the descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations
def recommend(title, cosine_sim=cosine_sim):
    idx = df[df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]  # Top 3 recommendations
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Example usage
movie = input("Enter a movie title: ")
if movie in df['title'].values:
    print("\nBecause you liked:", movie)
    print("You might also like:")
    print(recommend(movie))
else:
    print("Sorry, movie not found in the dataset.")


Enter a movie title: Interstellar

Because you liked: Interstellar
You might also like:
0         The Matrix
1          Inception
3    The Dark Knight
Name: title, dtype: object
