In [None]:
# Uploading all professor modules
import sys
sys.path.append('../GithubModules')
print(sys.version)

In [None]:
cp -r ../input/github-modules-mod/GITHUB_MODULES/* ./

In [None]:
import pandas as pd 
URM_df=pd.read_csv('../input/RecsysDataset/data_train.csv')
ICM_genre_df=pd.read_csv('../input/RecsysDataset/data_ICM_genre.csv')
ICM_subgenre_df=pd.read_csv('../input/RecsysDataset/data_ICM_subgenre.csv')
ICM_channel_df=pd.read_csv('../input/RecsysDataset/data_ICM_channel.csv')
ICM_event_df=pd.read_csv('../input/RecsysDataset/data_ICM_event.csv')
# Extract a list of users who will be present in the final submission
target_users = pd.read_csv('../input/RecsysDataset/data_target_users_test.csv')

In [None]:
# Just for usability/readability + convert values from float to int 

URM_df.columns = ['userID','itemID','interaction']
ICM_channel_df.columns = ['itemID','featureID','value']
ICM_event_df.columns = ['itemID','featureID','value']
ICM_genre_df.columns = ['itemID','featureID','value']
ICM_subgenre_df.columns = ['itemID','featureID','value']
URM_df['interaction'] = URM_df['interaction'].astype(int)
ICM_channel_df['value'] = ICM_channel_df['value'].astype(int)
ICM_event_df['value'] = ICM_event_df['value'].astype(int)
ICM_genre_df['value'] = ICM_genre_df['value'].astype(int)
ICM_subgenre_df['value'] = ICM_subgenre_df['value'].astype(int)

In [None]:
#Let's use properties of the sparse matrices
import scipy.sparse as sps
import numpy as np
from numpy import linalg as LA
URM_sparse = sps.coo_matrix((URM_df['interaction'].values,(URM_df['userID'].values,URM_df['itemID'].values)))
URM_csr = URM_sparse.tocsr()

In [None]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_user_wise
from Evaluation.Evaluator import EvaluatorHoldout

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_csr , train_percentage = 0.80)
#URM_train, URM_validation = split_train_in_two_percentage_user_wise(URM_sparse.tocsr(), train_percentage = 0.8, verbose = False)
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

In [None]:
from Recommenders.SLIM.SLIMElasticNetRecommender_mod import SLIMElasticNetRecommender
import random 
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender
from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from Recommenders.BaseRecommender import BaseRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.GraphBased.P3alphaRecommender import P3alphaRecommender

In [None]:
import pyximport
pyximport.install()

In [None]:
#prepare the environment to run Cython code
!python run_compile_all_cython.py

In [None]:
slim_BPR = SLIM_BPR_Cython(URM_train)
slim_BPR.fit(topK = 200,random_seed=1234,lambda_i=0.0037050152320468486,lambda_j=5e-06,epochs=338,learning_rate=0.014618519344494077)

In [None]:
SLIM = SLIMElasticNetRecommender(URM_train)
SLIM.fit(l1_ratio = 0.0006874637222349307, alpha = 0.05475509828499467, positive_only = True, topK = 2822)

In [None]:
iALS = IALSRecommender(URM_train)
iALS.fit(epochs = 157, num_factors = 54, alpha = 0.6754923563037951, reg = 0.0020435763517982174)

In [None]:
itemKNN = ItemKNNCFRecommender(URM_train)
itemKNN.fit(topK = 169, shrink = 10)

In [None]:
RP3beta = RP3betaRecommender(URM_train)
RP3beta.fit(topK=200, alpha = 1.2540157090612445, beta = 0.4017561235699327)

In [None]:
SVD = PureSVDRecommender(URM_train)
SVD.fit(num_factors=28,random_seed=1234)

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(RP3beta)
print("Result of RP3beta: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(SVD)
print("Result of SVD: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(iALS)
print("Result of ALS: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(SLIM)
print("Result of SLIM: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(slim_BPR)
print("Result of BPR: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
result_df, _ = evaluator_validation.evaluateRecommender(itemKNN)
print("Result of KNN: {} ".format(result_df.loc[10]["MAP"]))

In [None]:
class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """

    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        
        
        
    def fit(self, norm, alpha = 0.5):

        self.alpha = alpha
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        if(self.recommender_1.RECOMMENDER_NAME == "DifferentLossScoresHybridRecommender"):
            item_weights_1 = self.recommender_1._compute_item_score(user_id_array,items_to_compute) 
        else:    
            item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
    
        if(self.recommender_2.RECOMMENDER_NAME == "DifferentLossScoresHybridRecommender"):
            item_weights_2 = self.recommender_2._compute_item_score(user_id_array,items_to_compute) 
        else:    
            item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)
        
        
        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))
        
        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))
        
        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * (1-self.alpha)

        return item_weights

In [None]:
from numpy import linalg as LA

best_result_1 = 0
best_dict_1 = {}
counter = 0
for norm in [1, 2, np.inf, -np.inf]:
    for alpha in list(np.arange(0.05,0.85,0.05)): # da 0.30 a 0.65
            print("==================================================================================================================")
            print('iteration {}, with norm = {}, alpha = {}' .format(counter,norm, alpha))
            difflosshybridrecommender = DifferentLossScoresHybridRecommender(URM_train,RP3beta,itemKNN)
            difflosshybridrecommender.fit(norm = norm, alpha = alpha)
            result_df, _ = evaluator_validation.evaluateRecommender(difflosshybridrecommender)
            print("Result: {} ".format(result_df.loc[10]["MAP"])) 
            if(result_df.loc[10]["MAP"]>best_result_1):
                best_result_1 = result_df.loc[10]["MAP"]
                best_dict_1['alpha'] = alpha
                best_dict_1['norm'] = norm
            counter = counter + 1    

In [None]:
hybrid1 = DifferentLossScoresHybridRecommender(URM_train,RP3beta,itemKNN)
hybrid1.fit(norm = best_dict_1['norm'], alpha = best_dict_1['alpha'])

In [None]:
best_result_2 = 0
best_dict_2 = {}
counter = 0
for norm in [1, 2, np.inf, -np.inf]:
    for alpha in list(np.arange(0.05,0.85,0.05)): # da 0.30 a 0.65
            print("==================================================================================================================")
            print('iteration {}, with norm = {}, alpha = {}' .format(counter,norm, alpha))
            difflosshybridrecommender = DifferentLossScoresHybridRecommender(URM_train,hybrid1,iALS)
            difflosshybridrecommender.fit(norm = norm, alpha = alpha)
            result_df, _ = evaluator_validation.evaluateRecommender(difflosshybridrecommender)
            print("Result: {} ".format(result_df.loc[10]["MAP"])) 
            if(result_df.loc[10]["MAP"]>best_result_2):
                best_result_2 = result_df.loc[10]["MAP"]
                best_dict_2['alpha'] = alpha
                best_dict_2['norm'] = norm
            counter = counter + 1    

In [None]:
hybrid2 = DifferentLossScoresHybridRecommender(URM_train, hybrid1, iALS)
hybrid2.fit(norm = best_dict_2['norm'], alpha = best_dict_2['alpha'])

In [None]:
from numpy import linalg as LA

best_result_3 = 0
best_dict_3 = {}
counter = 0
for norm in [1, 2, np.inf, -np.inf]:
    for alpha in list(np.arange(0.05,0.85,0.05)): #0.3 a 0.85
            print("==================================================================================================================")
            print('iteration {} with norm = {}, alpha = {}' .format(counter,norm, alpha))
            diffloss = DifferentLossScoresHybridRecommender(URM_train,hybrid2,slim_BPR)
            diffloss.fit(norm = norm, alpha = alpha)
            result_df, _ = evaluator_validation.evaluateRecommender(diffloss)
            print("Result: {} ".format(result_df.loc[10]["MAP"])) 
            if(result_df.loc[10]["MAP"]>best_result_3):
                best_result_3 = result_df.loc[10]["MAP"]
                best_dict_3['alpha'] = alpha
                best_dict_3['norm'] = norm
            counter = counter + 1       

In [None]:
hybrid3 = DifferentLossScoresHybridRecommender(URM_train,hybrid2,slim_BPR)
hybrid3.fit(norm = best_dict_3['norm'], alpha = best_dict_3['alpha'])

In [None]:
best_result_4 = 0
best_dict_4 = {}
counter = 0
for norm in [1, 2, np.inf, -np.inf]:
    for alpha in list(np.arange(0.05,0.85,0.05)): #0.3 a 0.85
            print("==================================================================================================================")
            print('iteration {} with norm = {}, alpha = {}' .format(counter,norm, alpha))
            diffloss = DifferentLossScoresHybridRecommender(URM_train,hybrid3,SLIM)
            diffloss.fit(norm = norm, alpha = alpha)
            result_df, _ = evaluator_validation.evaluateRecommender(diffloss)
            print("Result: {} ".format(result_df.loc[10]["MAP"])) 
            if(result_df.loc[10]["MAP"]>best_result_4):
                best_result_4 = result_df.loc[10]["MAP"]
                best_dict_4['alpha'] = alpha
                best_dict_4['norm'] = norm
            counter = counter + 1       

In [None]:
hybrid4 = DifferentLossScoresHybridRecommender(URM_train, hybrid3, SLIM)
hybrid4.fit(norm = best_dict_4['norm'], alpha = best_dict_4['alpha'])

In [None]:
best_result_1, best_result_2, best_result_3, best_result_4

In [None]:
best_dict_1, best_dict_2, best_dict_3, best_dict_4