In [1]:
from recommenders.mfi import MatrixFactorizationImplicit

In [2]:
import numpy as np
from tqdm.auto import tqdm
import scipy.sparse as sps
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
train_sets = [sps.load_npz(f'./data/train{i}.npz') for i in range(1, 6)]
test_sets = [sps.load_npz(f'./data/test{i}.npz') for i in range(1, 6)]

In [4]:
recommenders = [MatrixFactorizationImplicit() for _ in range(5)]

In [5]:
train_losses = []
test_losses = []

In [6]:
# for rec, train, test in zip(recommenders, train_sets, test_sets):
#     train_loss, test_loss = rec.train(train, test)
#     train_losses.append(train_loss)
#     test_losses.append(test_loss)

In [7]:
# for i, rec in enumerate(recommenders, start=1):
#     rec.save(f'./data/model{i}.pkl')

In [8]:
for i, rec in enumerate(recommenders, start=1):
    rec.load(f'./data/model{i}.pkl')

In [9]:
rec_at_5 = []
rec_at_10 = []
ndcg_at_5 = []
ndcg_at_10 = []

In [10]:
import utils
from recommenders.mfi import utils as mfi_utils

In [11]:
for rec, test in zip(recommenders, test_sets):
    _test = mfi_utils.build_conf_mat(test).tocsr()
    rec_at_5.append([])
    rec_at_10.append([])
    ndcg_at_5.append([])
    ndcg_at_10.append([])
    for i in tqdm(range(_test.shape[0])):
    # for i in tqdm(range(89271, 89275)):
        topk = rec.recommend(k=10, user=i)
        actual = _test[i]
        rec_at_5[-1].append(utils.recall_at_k(topk[:5], actual))
        rec_at_10[-1].append(utils.recall_at_k(topk, actual))
        ndcg_at_5[-1].append(utils.ndcg_at_k(topk[:5], actual))
        ndcg_at_10[-1].append(utils.ndcg_at_k(topk, actual))

  0%|          | 0/148438 [00:00<?, ?it/s]

  0%|          | 0/148438 [00:00<?, ?it/s]

  0%|          | 0/148438 [00:00<?, ?it/s]

  0%|          | 0/148438 [00:00<?, ?it/s]

  0%|          | 0/148438 [00:00<?, ?it/s]

In [19]:
import pickle
with open('./data/metrics.pkl', 'wb') as f:
    pickle.dump([rec_at_5, rec_at_10, ndcg_at_5, ndcg_at_10], f)

In [15]:
rec_5_avgs = []
print(f'AVG. RECALL @ 5')
for i, scores in enumerate(rec_at_5, start=1):
    avg_rec = np.mean(scores)
    rec_5_avgs.append(avg_rec)
    print(f'\tFOLD {i}: {avg_rec}')
print(f'AVG. OVER FOLDS: {np.mean(rec_5_avgs)} \u00B1 {np.std(rec_5_avgs)}')

AVG. RECALL @ 5
	FOLD 1: 0.17638614366079036
	FOLD 2: 0.17616829631984882
	FOLD 3: 0.1871915122314225
	FOLD 4: 0.17272039592558183
	FOLD 5: 0.18201576306917258
AVG. OVER FOLDS: 0.1788964222413632 ± 0.005107372627724864


In [16]:
rec_10_avgs = []
print(f'AVG. RECALL @ 10')
for i, scores in enumerate(rec_at_10, start=1):
    avg_rec = np.mean(scores)
    rec_10_avgs.append(avg_rec)
    print(f'\tFOLD {i}: {avg_rec}')
print(f'AVG. OVER FOLDS: {np.mean(rec_10_avgs)} \u00B1 {np.std(rec_10_avgs)}')

AVG. RECALL @ 10
	FOLD 1: 0.24681410601864756
	FOLD 2: 0.24431465847574663
	FOLD 3: 0.2577641470850341
	FOLD 4: 0.2402187986697141
	FOLD 5: 0.25002586470338695
AVG. OVER FOLDS: 0.24782751499050587 ± 0.0059131907043481185


In [17]:
ndcg_5_avgs = []
print(f'AVG. NDCG @ 5')
for i, scores in enumerate(ndcg_at_5, start=1):
    avg_ndcg = np.mean(scores)
    ndcg_5_avgs.append(avg_ndcg)
    print(f'\tFOLD {i}: {avg_ndcg}')
print(f'AVG. OVER FOLDS: {np.mean(ndcg_5_avgs)} \u00B1 {np.std(ndcg_5_avgs)}')

AVG. NDCG @ 5
	FOLD 1: 0.3315721916927829
	FOLD 2: 0.3361803089961092
	FOLD 3: 0.3476064521306807
	FOLD 4: 0.33267275557732723
	FOLD 5: 0.33961901669804556
AVG. OVER FOLDS: 0.3375301450189891 ± 0.005775964806239298


In [18]:
ndcg_10_avgs = []
print(f'AVG. NDCG @ 10')
for i, scores in enumerate(ndcg_at_10, start=1):
    avg_ndcg = np.mean(scores)
    ndcg_10_avgs.append(avg_ndcg)
    print(f'\tFOLD {i}: {avg_ndcg}')
print(f'AVG. OVER FOLDS: {np.mean(ndcg_10_avgs)} \u00B1 {np.std(ndcg_10_avgs)}')

AVG. NDCG @ 10
	FOLD 1: 0.36782502961300007
	FOLD 2: 0.37296343555370093
	FOLD 3: 0.38314175391352934
	FOLD 4: 0.3677694763559074
	FOLD 5: 0.3756627568133843
AVG. OVER FOLDS: 0.3734724904499044 ± 0.005709102262528303
