In [2]:
from src.Base.Similarity.Compute_Similarity import SimilarityFunction
from src.KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from src.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from src.KNN.ItemKNNCBFCFSimilarityHybridRecommender import ItemKNNCBFCFSimilarityHybridRecommender
from src.Base.Evaluation.Evaluator import EvaluatorHoldout
from src.Utils.load_URM import load_URM
from src.Utils.load_ICM import load_ICM
from operator import itemgetter
import numpy as np

URM_all = load_URM("../in/data_train.csv")
ICM_all = load_ICM("../in/data_ICM_title_abstract.csv")
from src.Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])




In [4]:
MAP_CBF=[]
MAP_CF=[]

for similarity in ['cosine','pearson','jaccard','tanimoto','adjusted']:

    for topK in [10, 50, 100, 200, 500]:
        for shrink in [0, 10, 50, 100, 200, 500]:
            recommender = ItemKNNCFRecommender(URM_train)
            recommender.fit(topK=topK, shrink=shrink, similarity=similarity, normalize=False)

            result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

            MAP_CF.append(('similarity={}'.format(similarity),'topK={}'.format(topK),'shrink={}'.format(shrink),result_dict[10]["MAP"]))

            print(MAP_CF[-1])

    for topK in [10, 50, 100, 200, 500]:
        for shrink in [0, 10, 50, 100, 200, 500]:
            recommender = ItemKNNCBFRecommender(URM_train=URM_train, ICM_train=ICM_all)
            recommender.fit(topK=topK, shrink=shrink, similarity=similarity, normalize=False)

            result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

            MAP_CBF.append(('similarity={}'.format(similarity),'topK={}'.format(topK),'shrink={}'.format(shrink),result_dict[10]["MAP"]))
            print(MAP_CBF[-1])

ItemKNNCFRecommender: URM Detected 64 (0.81 %) cold users.
ItemKNNCFRecommender: URM Detected 2397 (9.23 %) cold items.
Similarity column 25975 ( 100 % ), 10486.34 column/sec, elapsed time 0.04 min
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 3.71 sec. Users per second: 1528
('similarity=cosine', 'topK=10', 'shrink=0', 0.0442636510510313)
ItemKNNCFRecommender: URM Detected 64 (0.81 %) cold users.
ItemKNNCFRecommender: URM Detected 2397 (9.23 %) cold items.
Similarity column 25975 ( 100 % ), 7712.81 column/sec, elapsed time 0.06 min
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 3.58 sec. Users per second: 1581
('similarity=cosine', 'topK=10', 'shrink=10', 0.0441807830548129)
ItemKNNCFRecommender: URM Detected 64 (0.81 %) cold users.
ItemKNNCFRecommender: URM Detected 2397 (9.23 %) cold items.
Similarity column 25975 ( 100 % ), 7649.20 column/sec, elapsed time 0.06 min
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 3.67 sec. Users per second: 1545
('similarity=cosine', 'topK=10', '

In [5]:
max_tuple = max(MAP_CF, key=itemgetter(3))
print('The best combination for the ICF recommender is ({},{},{}), with a MAP = {}'.format(max_tuple[0],max_tuple[1], max_tuple[2], max_tuple[3]))

max_tuple = max(MAP_CBF, key=itemgetter(3))
print('The best combination for the CBF recommender is ({},{},{}), with a MAP = {}'.format(max_tuple[0],max_tuple[1], max_tuple[2], max_tuple[3]))



The best combination for the ICF recommender is (similarity=jaccard,topK=200,shrink=200), with a MAP = 0.05086721307642614
The best combination for the CBF recommender is (similarity=jaccard,topK=200,shrink=10), with a MAP = 0.03583682448787383


In [9]:
from src.Hybrid.GeneralizedMergedHybridRecommender import GeneralizedMergedHybridRecommender


cf_recommender = ItemKNNCFRecommender(URM_train)
cf_recommender.fit(topK=200, shrink=200, similarity='jaccard', normalize=False)


cbf_recommender = ItemKNNCBFRecommender(URM_train=URM_train, ICM_train=ICM_all)
cbf_recommender.fit(topK=200, shrink=10, similarity='jaccard', normalize=False)

recommender = GeneralizedMergedHybridRecommender(
    URM_train=URM_train,
    recommenders=[
        cf_recommender,
        cbf_recommender
    ],
    verbose=False
)

ItemKNNCFRecommender: URM Detected 64 (0.81 %) cold users.
ItemKNNCFRecommender: URM Detected 2397 (9.23 %) cold items.
Similarity column 25975 ( 100 % ), 5459.32 column/sec, elapsed time 0.08 min
ItemKNNCBFRecommender: URM Detected 64 (0.81 %) cold users.
ItemKNNCBFRecommender: URM Detected 2397 (9.23 %) cold items.
Similarity column 25975 ( 100 % ), 3619.26 column/sec, elapsed time 0.12 min


In [12]:
MAP=[]

for alpha in np.linspace(start=0.0,stop=1.0,num=15):
    recommender.fit(alphas=[alpha, 1-alpha])

    result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

    MAP.append({'alpha':alpha,'MAP':result_dict[10]["MAP"]})
    print(MAP[-1])

EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.21 sec. Users per second: 1087
{'alpha': 0.0, 'MAP': 0.03583682448787383}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.21 sec. Users per second: 1086
{'alpha': 0.07142857142857142, 'MAP': 0.03607396361399994}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.35 sec. Users per second: 1058
{'alpha': 0.14285714285714285, 'MAP': 0.036522578084739035}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.36 sec. Users per second: 1056
{'alpha': 0.21428571428571427, 'MAP': 0.03719232211723671}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.38 sec. Users per second: 1052
{'alpha': 0.2857142857142857, 'MAP': 0.03775833022563568}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.21 sec. Users per second: 1086
{'alpha': 0.3571428571428571, 'MAP': 0.03873053036919037}
EvaluatorHoldout: Processed 5663 ( 100.00% ) in 5.33 sec. Users per second: 1062
{'alpha': 0.42857142857142855, 'MAP': 0.04026685798582835}
EvaluatorHoldout: Processed 5663 ( 10

In [14]:
max_tuple = max(MAP, key= lambda x: x['MAP'])
print('The best combination for the hybrid recommender is ({}), with a MAP = {}'.format(max_tuple['alpha'], max_tuple['MAP']))


The best combination for the hybrid recommender is (0.9285714285714285), with a MAP = 0.06664686415962436
