In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample movie data: movie titles and their genres
movies_data = {
    'The Shawshank Redemption': 'drama',
    'The Godfather': 'drama',
    'The Dark Knight': 'action',
    'Pulp Fiction': 'drama',
    'Forrest Gump': 'drama',
    'The Lord of the Rings: The Return of the King': 'fantasy',
    'Inception': 'action',
    'Titanic': 'romance',
    'Interstellar': 'sci-fi',
    'The Matrix': 'sci-fi'
}

# Function to recommend movies based on user's preferences
def recommend_movies(user_likes, movies_data):
    # Transform movie genres into TF-IDF vectors
    tfidf = TfidfVectorizer()
    genres_matrix = tfidf.fit_transform(list(movies_data.values()))

    # Compute similarity scores (cosine similarity)
    cosine_similarities = linear_kernel(genres_matrix, genres_matrix)

    # Get indices of movies
    indices = {movie: i for i, movie in enumerate(movies_data.keys())}

    # Find index of user-liked movies
    movie_indices = [indices[movie] for movie in user_likes]

    # Calculate average similarity with liked movies
    avg_similarities = cosine_similarities[movie_indices].mean(axis=0)

    # Get top recommendations based on similarity
    similar_movies_indices = avg_similarities.argsort()[::-1]

    # Exclude movies the user has already liked
    recommended_movies = [list(movies_data.keys())[i] for i in similar_movies_indices if i not in movie_indices]

    return recommended_movies[:5]  # Return top 5 recommendations

# User preferences (liked movies)
user_likes = ['The Shawshank Redemption', 'The Godfather']

# Get movie recommendations for the user
recommendations = recommend_movies(user_likes, movies_data)

# Print recommended movies
print("Recommended movies:")
for movie in recommendations:
    print(movie)


Recommended movies:
Forrest Gump
Pulp Fiction
The Matrix
Interstellar
Titanic
