In [1]:
import numpy as np
import cornac
from cornac.data import Reader
from cornac.datasets import netflix
from cornac.eval_methods import RatioSplit
from cornac.models import MF

## Train a base recommender model

In [2]:
data = netflix.load_feedback(variant="small", reader=Reader(bin_threshold=1.0))

ratio_split = RatioSplit(
    data=data,
    test_size=0.1,
    rating_threshold=1.0,
    exclude_unknowns=True,
    verbose=True,
    seed=123,
)

mf = MF(
    k=50,
    max_iter=25, 
    learning_rate=0.01, 
    lambda_reg=0.02, 
    use_bias=False,
    verbose=True,
    seed=123,
)

auc = cornac.metrics.AUC()
rec_20 = cornac.metrics.Recall(k=20)

cornac.Experiment(
    eval_method=ratio_split,
    models=[mf],
    metrics=[auc, rec_20],
    user_based=True,
).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 9986
Number of items = 4921
Number of ratings = 547022
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 9986
Number of items = 4921
Number of ratings = 60747
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 9986
Total items = 4921

[MF] Training started!


  0%|          | 0/25 [00:00<?, ?it/s]

Optimization finished!

[MF] Evaluation started!


Ranking:   0%|          | 0/8233 [00:00<?, ?it/s]


TEST:
...
   |    AUC | Recall@20 | Train (s) | Test (s)
-- + ------ + --------- + --------- + --------
MF | 0.8530 |    0.0669 |    0.9041 |   6.4182



## Test setting

In [3]:
K = 20
N = 10000
test_users = np.random.RandomState(123).choice(mf.user_ids, size=N)

### Time taken by the base model

In [4]:
%%time
mf_recs = []
for uid in test_users:
    mf_recs.append(mf.recommend(uid, k=K))

CPU times: user 1min 17s, sys: 15.3 ms, total: 1min 17s
Wall time: 1.63 s


In [5]:
def compute_recall(retrieved_neighbors, true_neighbors):
    total = 0
    for retrieved, true in zip(retrieved_neighbors, true_neighbors):
        total += len(set(retrieved) & set(true))
    return total / (N * K)

## Test performance

In [40]:
import time
from cornac.models import AnnoyANN
from cornac.models import FaissANN
from cornac.models import HNSWLibANN
from cornac.models import ScaNNANN

anns = [
    AnnoyANN(model=mf, n_trees=20, search_k=500, seed=123, num_threads=-1),
    FaissANN(model=mf, nlist=100, nprobe=50, use_gpu=False, seed=123, num_threads=-1),
    HNSWLibANN(model=mf, M=16, ef_construction=100, ef=50, seed=123, num_threads=-1),
    ScaNNANN(
        model=mf,
        partition_params={"num_leaves": 100, "num_leaves_to_search": 50},
        score_params={"dimensions_per_block": 2, "anisotropic_quantization_threshold": 0.2}, 
        rescore_params={"reordering_num_neighbors": 100},
        seed=123, num_threads=-1,
    ),
]

for ann in anns:
    s = time.time()
    ann.build_index()
    t1 = time.time() - s
    
    s = time.time()
    ann_recs = []
    for uid in test_users:
        ann_recs.append(ann.recommend(uid, k=K))
    t2 = time.time() - s

    recall = compute_recall(ann_recs, mf_recs)
    print(f"{ann.name}\t\tIndexing={int(t1*1000)}ms\t\tRetrieval={int(t2*1000)}ms\t\tRecall={recall:.5f}")

AnnoyANN		Indexing=30ms		Retrieval=590ms		Recall=0.01299
FaissANN		Indexing=16ms		Retrieval=897ms		Recall=0.99938
HNSWLibANN		Indexing=91ms		Retrieval=215ms		Recall=0.99874
ScaNNANN		Indexing=107ms		Retrieval=512ms		Recall=0.99997


## Test save/load

In [7]:
for ann in anns:
    saved_path = ann.save("save_dir")
    loaded_ann = ann.load(saved_path)
    print(
        ann.name, 
        np.array_equal(
            ann.recommend_batch(test_users[:10], k=K), 
            loaded_ann.recommend_batch(test_users[:10], k=K)
        )
    ) 

AnnoyANN True
FaissANN True
HNSWLibANN True
ScaNNANN True
