In [None]:
import pandas as pd
import numpy as np


In [None]:
df = pd.read_csv('events.csv')

In [None]:
# Assign numerical weights to different interaction types
# View = weak interest, Add to cart = medium interest, Transaction = strong interest
event_weight = {
    'view': 1,
    'addtocart': 2,
    'transaction': 3
}

# Map interaction types to numerical values
df['interaction'] = df['event'].map(event_weight)


In [None]:
# Aggregate multiple interactions between the same user and item
interaction_df = (
    df.groupby(['visitorid', 'itemid'])['interaction']
    .sum()
    .reset_index()
)


In [None]:
# Count number of interactions per user and per item
user_counts = interaction_df['visitorid'].value_counts()
item_counts = interaction_df['itemid'].value_counts()


# Keep only active users and frequently interacted items
active_users = user_counts[user_counts >= 20].index
popular_items = item_counts[item_counts >= 20].index


# Filter the interaction data
filtered_df = interaction_df[
    interaction_df['visitorid'].isin(active_users) &
    interaction_df['itemid'].isin(popular_items)
]


In [None]:
# Create user-item interaction matrix
# Rows = users, Columns = items, Values = interaction strength

user_item_matrix = filtered_df.pivot_table(
    index='visitorid',
    columns='itemid',
    values='interaction',
    fill_value=0
)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute item-item similarity matrix
# Transpose is used because we want similarity between items

item_similarity = cosine_similarity(user_item_matrix.T)


In [None]:
# Convert similarity matrix into a DataFrame for easier access
item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)


In [None]:
# Function to recommend similar items
def recommend_items(item_id, top_n=5):
  # Sort items based on similarity score
    similar_items = item_similarity_df[item_id].sort_values(ascending=False)
  # Exclude the item itself and return top-N recommendations
    return similar_items.iloc[1:top_n+1]


In [None]:
# Select a sample item
sample_item = item_similarity_df.columns[0]
# Generate top-5 recommendations
recommend_items(sample_item, top_n=5)


Unnamed: 0_level_0,6
itemid,Unnamed: 1_level_1
345004,0.57735
79057,0.57735
287572,0.57735
424932,0.57735
242380,0.57735


In [None]:
# Generate top-10 recommendations for tuning
recommend_items(sample_item, top_n=10)


Unnamed: 0_level_0,6
itemid,Unnamed: 1_level_1
345004,0.57735
79057,0.57735
287572,0.57735
424932,0.57735
242380,0.57735
295168,0.471405
291036,0.408248
345755,0.408248
272144,0.408248
47353,0.408248
