In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample data: Movies with genre, director, and keywords
data = {
    'movie_id': [1, 2, 3, 4, 5],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
    'genre': ['Action, Adventure', 'Action, Comedy', 'Comedy, Drama', 'Action, Thriller', 'Comedy, Romance'],
    'director': ['Director X', 'Director Y', 'Director Z', 'Director X', 'Director W'],
    'keywords': ['fight, adventure, hero', 'comedy, action, fun', 'romance, drama, life', 'thriller, action, fight', 'comedy, romance, love']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Combine genre, director, and keywords into one string
df['description'] = df['genre'] + ' ' + df['director'] + ' ' + df['keywords']

# Use TF-IDF Vectorizer to convert text to feature vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Compute cosine similarity between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend movies based on a movie liked by the user
def recommend_movies(movie_title, cosine_sim=cosine_sim):
    # Get index of the movie that matches the title
    idx = df[df['title'] == movie_title].index[0]

    # Get the pairwise similarity scores for this movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top 3 most similar movies
    sim_scores = sim_scores[1:4]  # Skip the first movie (itself)
    movie_indices = [i[0] for i in sim_scores]

    # Return movie titles
    return df['title'].iloc[movie_indices]

# Test the recommendation system
recommendations = recommend_movies('Movie A')
print("Recommended Movies:", recommendations)


Recommended Movies: 3    Movie D
1    Movie B
4    Movie E
Name: title, dtype: object
