In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [10]:
movies_data = {
    'Title': ['Movie 1', 'Movie 2', 'Movie 3', 'Movie 4', 'Movie 5'],
    'Genre': ['Action', 'Drama', 'Comedy', 'Action', 'Drama'],
    'Description': [
        'Exciting action-packed movie with lots of explosions.',
        'A touching drama about human relationships.',
        'Laugh-out-loud comedy with hilarious moments.',
        'Thrilling action movie with intense car chases.',
        'Heartwarming drama that explores the meaning of family.'
    ]
}


movies_df = pd.DataFrame(movies_data)

In [11]:
def content_based_recommendation(user_preference, df, vectorizer, similarity_matrix):
    # Transform user preference into a vector
    user_vector = vectorizer.transform([user_preference])

    # Calculate similarity scores between user preference and each movie
    cosine_similarities = linear_kernel(user_vector, similarity_matrix).flatten()

    # Get indices of movies sorted by similarity scores
    movie_indices = cosine_similarities.argsort()[::-1]

    # Recommend top N movies
    recommended_movies = df['Title'].iloc[movie_indices]

    return recommended_movies

In [12]:
# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')



In [13]:
# Transform movie descriptions into TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df['Description'])

In [14]:
# Get movie recommendations based on user preference
user_preference = 'Exciting action movie with car chases.'
recommendations = content_based_recommendation(user_preference, movies_df, tfidf_vectorizer, tfidf_matrix)


In [15]:
# Print the recommendations
print("User Preference:", user_preference)
print("Recommended Movies:")
print(recommendations)

User Preference: Exciting action movie with car chases.
Recommended Movies:
3    Movie 4
0    Movie 1
4    Movie 5
2    Movie 3
1    Movie 2
Name: Title, dtype: object
