In [None]:
# Import relevant libraries and creates access to library seen in class

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #Visualization
from scipy.sparse import * # COO, CSR and CSC matrices


# Access to professor library
from os import sys

package_paths = [
    '../input/mauriziofd/',
]

for pth in package_paths:
    sys.path.append(pth)
    
from Recommenders.Similarity.Compute_Similarity_Python import Compute_Similarity_Python

In [None]:
# Read data for the URM and ICM
URM_df= pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_train.csv', dtype={0:int, 1:int, 2:float})
genre_matrix = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_ICM_genre.csv', dtype={0:int, 1:int, 2:int})
target_users = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_target_users_test.csv', dtype={0:int})
subgenre_matrix = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_ICM_subgenre.csv', dtype={0:int, 1:int, 2:int})
channel_matrix = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_ICM_channel.csv', dtype={0:int, 1:int, 2:int})

target_users.columns = ['user_id']
URM_df.columns = ['user', 'item', 'interaction']
genre_matrix.columns = ['item', 'genre', 'hasgenre']
subgenre_matrix.columns = ['item', 'subgenre', 'hassubgenre']
channel_matrix.columns = ['item', 'channel', 'onchannel']

# Merge datasets into the ICM
genre_subgenre_ICM = pd.merge(genre_matrix, subgenre_matrix, on='item')
ICM_df = pd.merge(genre_subgenre_ICM, channel_matrix, on='item')

ICM_df.pop('hasgenre')
ICM_df.pop('hassubgenre')
ICM_df = ICM_df.rename({'onchannel':'data'}, axis='columns')

# Calculates number of genres, subgenres and channels

# Creates csc matrix from dataframe
URM_all = coo_matrix((URM_df['interaction'].values, (URM_df['user'].values, URM_df['item'].values)))
URM_csr = URM_all.tocsr()
genre_coo = coo_matrix((genre_matrix['hasgenre'].values, (genre_matrix['item'].values, genre_matrix['genre'].values)))
genre_csc = genre_coo.tocsc()
subgenre_coo = coo_matrix((subgenre_matrix['hassubgenre'].values, (subgenre_matrix['item'].values, subgenre_matrix['subgenre'].values)))
subgenre_csc = subgenre_coo.tocsc()
channel_coo = coo_matrix((channel_matrix['onchannel'].values, (channel_matrix['item'].values, channel_matrix['channel'].values)))
channel_csc = channel_coo.tocsc()
channel_csr = channel_coo.tocsr()

# Quite inefficient, maybe there's a unique approach from exercise sessions
n_of_genres = genre_coo.shape[1]

n_of_subgenres = subgenre_coo.shape[1]

n_of_channels = channel_coo.shape[1]

print(n_of_genres, n_of_subgenres, n_of_channels)

In [None]:
# Split in training and validation to assess how good the predictions will be at test time
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_csr, train_percentage = 0.80)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

In [None]:
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
recommender_ItemKNNCF = ItemKNNCFRecommender(URM_train)

x_tick = [10, 50, 100, 200, 500]
MAP_per_k = []

for topK in x_tick:
    
    recommender_ItemKNNCF.fit(shrink=0.0, topK=topK)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_ItemKNNCF)
    
    MAP_per_k.append(result_df.loc[10]["MAP"])
    print("topK = {}, MAP = {}".format(topK, result_df.loc[10]["MAP"]))

plt.plot(x_tick, MAP_per_k)
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.show()


In [None]:
shrink_tick = [0, 5, 10, 15, 20, 50, 100]
MAP_per_shrinkage = []

for shrink in shrink_tick:
    
    recommender_ItemKNNCF.fit(shrink=shrink, topK=200)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_ItemKNNCF)
    
    MAP_per_shrinkage.append(result_df.loc[10]["MAP"])
    print("shrink = {}, MAP = {}".format(shrink, result_df.loc[10]["MAP"]))
plt.plot(shrink_tick, MAP_per_shrinkage)
plt.ylabel('MAP')
plt.xlabel('Shrink')
plt.show()

In [None]:
from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender

pureSVD = PureSVDRecommender(URM_train)

factors_tick = [15, 20, 25, 30, 35,40]
MAP_per_shrinkage = []

for factor in factors_tick:
    
    pureSVD.fit(num_factors=factor)
    
    result_df, _ = evaluator_validation.evaluateRecommender(pureSVD)
    
    MAP_per_shrinkage.append(result_df.loc[10]["MAP"])
    print("latent factors = {}, MAP = {}".format(factor, result_df.loc[10]["MAP"]))
    
plt.plot(factors_tick, MAP_per_shrinkage)
plt.ylabel('MAP')
plt.xlabel('Latent factors')
plt.show()

In [None]:
from Recommenders.KNN.UserKNNCFRecommender import UserKNNCFRecommender
recommender_userKNN = UserKNNCFRecommender(URM_train)
recommender_userKNN.fit(shrink=0, topK=300)
result_df, _ = evaluator_validation.evaluateRecommender(recommender_userKNN)
print("MAP = {}".format(result_df.loc[10]["MAP"]))

In [None]:
from Recommenders.BaseRecommender import BaseRecommender

class ScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha)

    """

    RECOMMENDER_NAME = "ScoresHybridRecommender"

    def __init__(self, URM_train, recommender_1, recommender_2):
        super(ScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        
        
    def fit(self, alpha = 0.5):
        self.alpha = alpha      


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        item_weights = item_weights_1*self.alpha + item_weights_2*(1-self.alpha)

        return item_weights

In [None]:
import scipy.sparse as sps
recommender_ItemKNNCF = ItemKNNCFRecommender(URM_train)
recommender_ItemKNNCF.fit(shrink=0.0, topK=200)
recommender_userKNN = UserKNNCFRecommender(URM_train)
recommender_userKNN.fit(shrink=0, topK=300)

scoreshybridrecommender = ScoresHybridRecommender(URM_train, recommender_ItemKNNCF, recommender_userKNN)

# x_tick = [5, 10, 50, 100, 200]
alpha_tick = [0, 0.05, 0.1, 0.15, 0.2]
# beta_tick = [0.1, 0.3, 0.5, 0.7, 0.9, 1]

MAP_per_k = []

for alpha in alpha_tick:
    
    scoreshybridrecommender.fit(alpha = alpha)
    
    result_df, _ = evaluator_validation.evaluateRecommender(scoreshybridrecommender)
    
    MAP_per_k.append(result_df.loc[10]["MAP"])
    print("alpha = {}, MAP = {}".format(alpha, result_df.loc[10]["MAP"]))

plt.plot(alpha_tick, MAP_per_k)
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.show()

best **alpha = 0.1**, best **MAP = 0.23448**

In [None]:
# Hybrid between pureSVD and user-based KNN CF
import scipy.sparse as sps
recommender_pureSVD = PureSVDRecommender(URM_train)
recommender_pureSVD.fit(num_factors=35)

recommender_userKNN = UserKNNCFRecommender(URM_train)
recommender_userKNN.fit(shrink=0, topK=300)

scoreshybridrecommender = ScoresHybridRecommender(URM_train, recommender_pureSVD, recommender_userKNN)

# x_tick = [5, 10, 50, 100, 200]
alpha_tick = [0.8, 0.9, 0.95, 1]
# beta_tick = [0.1, 0.3, 0.5, 0.7, 0.9, 1]

MAP_per_k = []

for alpha in alpha_tick:
    
    scoreshybridrecommender.fit(alpha = alpha)
    
    result_df, _ = evaluator_validation.evaluateRecommender(scoreshybridrecommender)
    
    MAP_per_k.append(result_df.loc[10]["MAP"])
    print("alpha = {}, MAP = {}".format(alpha, result_df.loc[10]["MAP"]))

plt.plot(alpha_tick, MAP_per_k)
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.show()

Best **alpha = 0.9**, Best **MAP = 0.2422**

In [None]:
# RP3beta + SLIM
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

recommender_Slim = SLIMElasticNetRecommender(URM_train)
topK_tick = [5, 10, 50, 100, 200]
# beta_tick = [0.1, 0.3, 0.5, 0.7, 0.9, 1]

MAP_per_k = []

for topK in topK_tick:
    
    recommender_Slim.fit(l1_ratio=0.1, alpha = 1, topK=topK)
    
    result_df, _ = evaluator_validation.evaluateRecommender(recommender_Slim)
    
    MAP_per_k.append(result_df.loc[10]["MAP"])
    print("alpha = {}, MAP = {}".format(topK, result_df.loc[10]["MAP"]))

plt.plot(topK_tick, MAP_per_k)
plt.ylabel('MAP')
plt.xlabel('TopK')
plt.show()