In [1]:
import pandas as pd
import numpy as np
import json
from scipy import sparse as sp
import implicit
from tqdm import tqdm

In [2]:
interactions_df = pd.read_csv('interactions.csv')

In [3]:
def average_precision(actual, recommended, k=30):
    ap_sum = 0
    hits = 0
    for i in range(k):
        product_id = recommended[i] if i < len(recommended) else None
        if product_id is not None and product_id in actual:
            hits += 1
            ap_sum += hits / (i + 1)
    return ap_sum / k


def normalized_average_precision(actual, recommended, k=30):
    actual = set(actual)
    if len(actual) == 0:
        return 0.0

    ap = average_precision(actual, recommended, k=k)
    ap_ideal = average_precision(actual, list(actual)[:k], k=k)
    return ap / ap_ideal

In [4]:
def make_coo_row(value):
    values = [1.0 for _ in value]
    return sp.coo_matrix(
        (np.array(values).astype(np.float32), ([0] * len(value), value)), shape=(1, 18495),
    )

In [5]:
interactions_df.head()

Unnamed: 0,row,col,data
0,0,3568,1.0
1,0,3827,1.0
2,0,4844,1.0
3,0,5734,1.0
4,0,6518,1.0


In [11]:
tops = train.groupby('col').count().sort_values('data', ascending=False)
tops

Unnamed: 0_level_0,row,data
col,Unnamed: 1_level_1,Unnamed: 2_level_1
8638,722,722
17955,709,709
5113,698,698
10227,651,651
8982,640,640
...,...,...
14955,1,1
4173,1,1
4150,1,1
14965,1,1


In [13]:
top_items = list(np.array(tops.index)[:30])
top_items

[8638,
 17955,
 5113,
 10227,
 8982,
 197,
 10466,
 4657,
 12469,
 4361,
 3922,
 10067,
 6289,
 5297,
 2301,
 7581,
 5645,
 4634,
 15892,
 3806,
 8666,
 3565,
 3572,
 187,
 9461,
 4844,
 5562,
 1212,
 8483,
 8481]

In [7]:
msk = np.random.rand(len(interactions_df)) < 0.8
train = interactions_df[msk]
test = interactions_df[~msk]

In [14]:
gt_items = {}
for item in test.values:
    if item[0] in gt_items:
        gt_items[item[0]].append(item[1])
    else:
        gt_items[item[0]] = [item[1]]

In [16]:
scores = []
for key, value in gt_items.items():
    items = value
    ap = normalized_average_precision(items, top_items)
    scores.append(ap)
np.mean(scores)    

0.009042792326834397

In [21]:
rows = []
gt_items = {}
for item in train.values:
    if item[0] in gt_items:
        gt_items[item[0]].append(item[1])
    else:
        gt_items[item[0]] = [item[1]]
for i in range(1, 30911):
    rows.append(make_coo_row(gt_items.get(i, [])))
rows[0]

<1x18495 sparse matrix of type '<class 'numpy.float32'>'
	with 5 stored elements in COOrdinate format>

In [22]:
X_sparse = sp.vstack(rows).tocsr()

In [23]:
X_sparse.shape

(30910, 18495)

In [44]:
model = implicit.als.AlternatingLeastSquares(factors=16, regularization=0.0, iterations=8)
model.fit(X_sparse)

  0%|          | 0/8 [00:00<?, ?it/s]

In [26]:
rows = []
gt_items_test = {}
for item in test.values:
    if item[0] in gt_items_test:
        gt_items_test[item[0]].append(item[1])
    else:
        gt_items_test[item[0]] = [item[1]]
for i in range(1, 30911):
    rows.append(make_coo_row(gt_items_test.get(i, [])))
    
rows = []
gt_items_train = {}
for item in train.values:
    if item[0] in gt_items_train:
        gt_items_train[item[0]].append(item[1])
    else:
        gt_items_train[item[0]] = [item[1]]
for i in range(1, 30911):
    rows.append(make_coo_row(gt_items_train.get(i, [])))

In [46]:
m_ap = []
for key, value in tqdm(gt_items_test.items()):
    row_sparse = make_coo_row(gt_items_train.get(key, [])).tocsr()
    recommended_items = model.recommend(int(key - 1), row_sparse, N=30, filter_already_liked_items=False, recalculate_user=False)
    m_ap.append(normalized_average_precision(value, recommended_items[0], k=30))
print(np.mean(m_ap))

100%|███████████████████████████████████| 20363/20363 [00:15<00:00, 1350.70it/s]

0.015480355866375032





In [53]:
model = implicit.nearest_neighbours.CosineRecommender(K=3)
model.fit(X_sparse)



  0%|          | 0/18495 [00:00<?, ?it/s]

In [54]:
m_ap = []
for key, value in tqdm(gt_items_test.items()):
    row_sparse = make_coo_row(gt_items_train.get(key, [])).tocsr()
    recommended_items = model.recommend(int(key - 1), row_sparse, N=30, filter_already_liked_items=True, recalculate_user=False)
    m_ap.append(normalized_average_precision(value, recommended_items[0], k=30))
print(np.mean(m_ap))

100%|███████████████████████████████████| 20363/20363 [00:03<00:00, 5223.66it/s]

0.027595435181740343



