In [1]:
# ===== 1. Imports =====
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import coo_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split

In [3]:
# ===== 2. Load and Filter Data =====
df = pd.read_csv('events.csv')

In [5]:
# Filter events to only 'view' and 'transaction'
df = df[df['event'].isin(['view', 'transaction'])]

# Map interaction weights: transaction = 5, view = 1
df['weight'] = df['event'].map({'view': 1, 'transaction': 5})

In [7]:
# ===== 3. Encode Users and Items =====
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['visitorid'])
df['item'] = item_encoder.fit_transform(df['itemid'])

In [9]:
# ===== 4. Filter Active Users and Popular Items =====
min_user_interactions = 10
min_item_interactions = 20

active_users = df['user'].value_counts()[lambda x: x >= min_user_interactions].index
popular_items = df['item'].value_counts()[lambda x: x >= min_item_interactions].index

df = df[df['user'].isin(active_users) & df['item'].isin(popular_items)]

In [11]:
# ===== 5. Train-Test Split =====
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [13]:
# ===== 6. Build Sparse User-Item Matrix =====
train_matrix = coo_matrix(
    (train_df['weight'], (train_df['user'], train_df['item']))
).tocsr()

In [15]:
# ===== 7. Fit Nearest Neighbors Model =====
nn_model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
nn_model.fit(train_matrix)    

In [17]:
# ===== 8. Define Recommendation Function =====
def recommend_items_ann(user_id, k=5, model=None, interactions_matrix=None):
    distances, indices = model.kneighbors(interactions_matrix[user_id], n_neighbors=6)
    similar_users = indices.flatten()[1:]  # Exclude self

    user_interactions = set(interactions_matrix[user_id].indices)
    recommendations = {}

    for sim_user in similar_users:
        for item in interactions_matrix[sim_user].indices:
            if item not in user_interactions:
                recommendations[item] = recommendations.get(item, 0) + 1

    recommended_items = sorted(recommendations.items(), key=lambda x: -x[1])
    return [item for item, score in recommended_items[:k]]

In [19]:
# ===== 9. Create Ground Truth for Evaluation =====
test_group = test_df.groupby('user')['item'].apply(set)
test_group = test_group[test_group.map(len) >= 2]  # keep only users with ≥2 test items

In [21]:
# ===== 10. Evaluation Functions =====
def precision_at_k(pred, actual, k):
    return len(set(pred[:k]) & set(actual)) / k if actual else 0

def recall_at_k(pred, actual, k):
    return len(set(pred[:k]) & set(actual)) / len(actual) if actual else 0

In [27]:
# ===== 11. Run Evaluation =====
precision_list = []
recall_list = []

sample_users = test_group[test_group.map(len) >= 1].sample(n=100, random_state=42).index

for user in sample_users:
    actual = test_group[user]
    pred = recommend_items_ann(user, k=5, model=nn_model, interactions_matrix=train_matrix)

    if len(actual) == 0 or len(pred) == 0:
        continue  # skip users with no test items or no recommendations

    precision_list.append(precision_at_k(pred, actual, 5))
    recall_list.append(recall_at_k(pred, actual, 5))

In [29]:
# ===== 12. Show Results =====
print(f'Average Precision@5: {np.mean(precision_list):.4f}')
print(f'Average Recall@5: {np.mean(recall_list):.4f}')

Average Precision@5: 0.0323
Average Recall@5: 0.0577


In [31]:
print(f"Evaluated {len(precision_list)} users with valid predictions.")


Evaluated 99 users with valid predictions.
