In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [6]:
# Example dataset
data = {
    'ItemID': [1, 2, 3, 4, 5],
    'Title': ['The Matrix', 'Inception', 'Interstellar', 'The Dark Knight', 'The Prestige'],
    'Genre': ['Action|Sci-Fi', 'Action|Thriller', 'Drama|Sci-Fi', 'Action|Crime', 'Drama|Mystery']
}

df = pd.DataFrame(data)

In [7]:
# Combine features for content-based filtering
def combine_features(row):
    return row['Title'] + " " + row['Genre']

df['CombinedFeatures'] = df.apply(combine_features, axis=1)

In [8]:
# Create the count matrix
count_vectorizer = CountVectorizer()
count_matrix = count_vectorizer.fit_transform(df['CombinedFeatures'])

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [9]:
# Recommendation function
def recommend(item_title, df, cosine_sim):
    # Get the index of the item that matches the title
    idx = df[df['Title'] == item_title].index[0]

    # Get the pairwise similarity scores of all items with the given item
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the items based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 3 most similar items
    sim_scores = sim_scores[1:4]  # Exclude the first item (itself)

    # Get the item indices
    item_indices = [i[0] for i in sim_scores]

    # Return the top 3 most similar items
    return df['Title'].iloc[item_indices]

In [10]:
# Example usage
item_to_recommend = 'The Matrix'
recommendations = recommend(item_to_recommend, df, cosine_sim)
print(f"Recommendations for '{item_to_recommend}':")
print(recommendations)

Recommendations for 'The Matrix':
2       Interstellar
3    The Dark Knight
1          Inception
Name: Title, dtype: object
