In [10]:
from src.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from src.Base.Evaluation.Evaluator import EvaluatorHoldout
from src.MatrixFactorization.PureSVDRecommender import PureSVDRecommender
from src.KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from src.KNN.ItemKNNSimilarityHybridRecommender import ItemKNNSimilarityHybridRecommender

import numpy as np
from operator import itemgetter

import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

data = pd.read_csv("in/data_train.csv")

import scipy.sparse as sps

userList = data['row'].tolist()
itemList = data['col'].tolist()
ratingList = data['data'].tolist()

URM_all = sps.coo_matrix((ratingList, (userList, itemList)))
URM_all = URM_all.tocsr()

from src.Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [2]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])




In [3]:
MAP=[]

for topK in [10, 50, 100, 200, 500]:
    for shrink in [0, 10, 50, 100, 200, 500]:
        recommender = ItemKNNCFRecommender(URM_train)
        recommender.fit(topK=topK, shrink=shrink)

        result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

        MAP.append(('topK={}'.format(topK),'shrink={}'.format(shrink),result_dict[10]["MAP"]))

ItemKNNCFRecommender: URM Detected 69 (0.87 %) cold users.
ItemKNNCFRecommender: URM Detected 2339 (9.00 %) cold items.
Similarity column 25975 ( 100 % ), 7115.86 column/sec, elapsed time 0.06 min
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 3.36 sec. Users per second: 1682
ItemKNNCFRecommender: URM Detected 69 (0.87 %) cold users.
ItemKNNCFRecommender: URM Detected 2339 (9.00 %) cold items.
Similarity column 25975 ( 100 % ), 7403.97 column/sec, elapsed time 0.06 min
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 3.35 sec. Users per second: 1688
ItemKNNCFRecommender: URM Detected 69 (0.87 %) cold users.
ItemKNNCFRecommender: URM Detected 2339 (9.00 %) cold items.
Similarity column 25975 ( 100 % ), 7218.11 column/sec, elapsed time 0.06 min
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 3.25 sec. Users per second: 1738
ItemKNNCFRecommender: URM Detected 69 (0.87 %) cold users.
ItemKNNCFRecommender: URM Detected 2339 (9.00 %) cold items.
Similarity column 25975 ( 100 % ), 7543.62 col

In [4]:
max_tuple = max(MAP, key=itemgetter(2))
print('The best combination for the ICF recommender is ({},{}), with a MAP = {}'.format(max_tuple[0],max_tuple[1], max_tuple[2]))

The best combination for the ICF recommender is (topK=100,shrink=50), with a MAP = 0.052242857053749196


In [12]:
MAP=[]

for num_factors in np.logspace(1,15,10, base=2).astype(int):
    recommender = PureSVDRecommender(URM_train)
    recommender.fit(num_factors = num_factors)

    result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

    MAP.append(('number of factors={}'.format(num_factors),result_dict[10]["MAP"]))

PureSVDRecommender: URM Detected 69 (0.87 %) cold users.
PureSVDRecommender: URM Detected 2339 (9.00 %) cold items.
PureSVDRecommender: Computing SVD decomposition...
PureSVDRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 4.66 sec. Users per second: 1213
PureSVDRecommender: URM Detected 69 (0.87 %) cold users.
PureSVDRecommender: URM Detected 2339 (9.00 %) cold items.
PureSVDRecommender: Computing SVD decomposition...
PureSVDRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 4.38 sec. Users per second: 1292
PureSVDRecommender: URM Detected 69 (0.87 %) cold users.
PureSVDRecommender: URM Detected 2339 (9.00 %) cold items.
PureSVDRecommender: Computing SVD decomposition...
PureSVDRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 5655 ( 100.00% ) in 4.46 sec. Users per second: 1269
PureSVDRecommender: URM Detected 69 (0.87 %) cold users.
PureSVDRecommender: URM Dete

In [14]:
max_tuple = max(MAP, key=itemgetter(1))
print('The best combination for the PureSVD recommender is ({}), with a MAP = {}'.format(max_tuple[0],max_tuple[1]))

The best combination for the PureSVD recommender is (number of factors=1290), with a MAP = 0.03242241064648013


In [3]:
metadata = pd.read_csv("in/data_ICM_title_abstract.csv")

itemICMList = metadata['row'].tolist()
featureList = metadata['col'].tolist()
weightList = metadata['data'].tolist()

ICM_all = sps.coo_matrix((weightList, (itemICMList, featureList)))
ICM_all = ICM_all.tocsr()



In [4]:
MAP=[]

for topK in [10, 50, 100, 200, 500]:
    for shrink in [0, 10, 50, 100, 200, 500]:
        recommender = ItemKNNCBFRecommender(URM_train=URM_train, ICM_train=ICM_all)
        recommender.fit(topK=topK, shrink=shrink)

        result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

        MAP.append(('topK={}'.format(topK),'shrink={}'.format(shrink),result_dict[10]["MAP"]))

ItemKNNCBFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCBFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 4912.75 column/sec, elapsed time 0.09 min
EvaluatorHoldout: Processed 5623 ( 100.00% ) in 3.14 sec. Users per second: 1792
ItemKNNCBFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCBFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 4869.25 column/sec, elapsed time 0.09 min
EvaluatorHoldout: Processed 5623 ( 100.00% ) in 3.04 sec. Users per second: 1849
ItemKNNCBFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCBFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 5128.03 column/sec, elapsed time 0.08 min
EvaluatorHoldout: Processed 5623 ( 100.00% ) in 3.15 sec. Users per second: 1788
ItemKNNCBFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCBFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 496

In [5]:
max_tuple = max(MAP, key=itemgetter(2))
print('The best combination for the CBF recommender is ({},{}), with a MAP = {}'.format(max_tuple[0],max_tuple[1], max_tuple[2]))

The best combination for the CBF recommender is (topK=500,shrink=10), with a MAP = 0.026158480099214582


In [8]:
MAP=[]

Similarity_1 = ItemKNNCFRecommender(URM_train=URM_train)
Similarity_1.fit(topK=100, shrink=50)
Similarity_1 = Similarity_1.W_sparse
Similarity_2 = ItemKNNCBFRecommender(URM_train=URM_train, ICM_train=ICM_all)
Similarity_2.fit(topK=500, shrink=10)
Similarity_2 = Similarity_2.W_sparse

for topK in [10, 50, 100, 200, 500]:
    for alpha in np.linspace(start=0.0,stop=1.0,num=10):
        recommender = ItemKNNSimilarityHybridRecommender(URM_train=URM_train, Similarity_1=Similarity_1, Similarity_2=Similarity_2)
        recommender.fit(topK=topK, alpha=alpha)

        result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

        MAP.append(('topK={}'.format(topK),'alpha={}'.format(alpha),result_dict[10]["MAP"]))


ItemKNNCFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 7225.40 column/sec, elapsed time 0.06 min
ItemKNNCBFRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNCBFRecommender: URM Detected 2355 (9.07 %) cold items.
Similarity column 25975 ( 100 % ), 4197.59 column/sec, elapsed time 0.10 min
ItemKNNSimilarityHybridRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 2355 (9.07 %) cold items.
EvaluatorHoldout: Processed 5623 ( 100.00% ) in 3.06 sec. Users per second: 1838
ItemKNNSimilarityHybridRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 2355 (9.07 %) cold items.
EvaluatorHoldout: Processed 5623 ( 100.00% ) in 3.24 sec. Users per second: 1736
ItemKNNSimilarityHybridRecommender: URM Detected 59 (0.74 %) cold users.
ItemKNNSimilarityHybridRecommender: URM Detected 2355 (9.07 %) cold items

In [9]:
max_tuple = max(MAP, key=itemgetter(2))
print('The best combination for the hybrid recommender is ({},{}), with a MAP = {}'.format(max_tuple[0],max_tuple[1], max_tuple[2]))

The best combination for the hybrid recommender is (topK=500,alpha=0.6666666666666666), with a MAP = 0.06221448623349726
