In [None]:
# Uploading all professor modules
import sys
sys.path.append('../GithubModules')
print(sys.version)

In [None]:
import pandas as pd 
URM_df=pd.read_csv('../input/RecsysDataset/data_train.csv')
ICM_genre_df=pd.read_csv('../input/RecsysDataset/data_ICM_genre.csv')
ICM_subgenre_df=pd.read_csv('../input/RecsysDataset/data_ICM_subgenre.csv')
ICM_channel_df=pd.read_csv('../input/RecsysDataset/data_ICM_channel.csv')
ICM_event_df=pd.read_csv('../input/RecsysDataset/data_ICM_event.csv')
# Extract a list of users who will be present in the final submission
target_users = pd.read_csv('../input/RecsysDataset/data_target_users_test.csv')

In [None]:
# Just for usability/readability + convert values from float to int 

URM_df.columns = ['userID','itemID','interaction']
ICM_channel_df.columns = ['itemID','featureID','value']
ICM_event_df.columns = ['itemID','featureID','value']
ICM_genre_df.columns = ['itemID','featureID','value']
ICM_subgenre_df.columns = ['itemID','featureID','value']
URM_df['interaction'] = URM_df['interaction'].astype(int)
ICM_channel_df['value'] = ICM_channel_df['value'].astype(int)
ICM_event_df['value'] = ICM_event_df['value'].astype(int)
ICM_genre_df['value'] = ICM_genre_df['value'].astype(int)
ICM_subgenre_df['value'] = ICM_subgenre_df['value'].astype(int)

In [None]:
#Let's use properties of the sparse matrices
import scipy.sparse as sps
import numpy as np
URM_sparse = sps.coo_matrix((URM_df['interaction'].values,(URM_df['userID'].values,URM_df['itemID'].values)))
URM_csr = URM_sparse.tocsr()


In [None]:
import pyximport
pyximport.install()

In [None]:
#prepare the environment to run Cython code
!python run_compile_all_cython.py

In [None]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_user_wise
from Evaluation.Evaluator import EvaluatorHoldout

URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_csr , train_percentage = 0.80)
#URM_train, URM_validation = split_train_in_two_percentage_user_wise(URM_sparse.tocsr(), train_percentage = 0.8, verbose = False)
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

In [None]:
#itemKNN - BPR
from Recommenders.BaseRecommender import BaseRecommender
class CoTrainingRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha)

    """

    RECOMMENDER_NAME = "CoTrainingRecommender"

    def __init__(self, URM_train, recommender_1, recommender_2):
        super(CoTrainingRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
    
        
    def fit(self,norm,topKA,topKB,shrink,lambda_i,lambda_j,epochs,learning_rate,w1):
        self.norm = norm
        self.w1 = w1
        self.recommender_1.fit(topK=topKA, shrink = shrink)
        self.recommender_2.fit(topK=topKB,random_seed=1234,lambda_i=lambda_i,lambda_j=lambda_j,epochs = epochs,learning_rate = learning_rate)


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        if(self.recommender_1.RECOMMENDER_NAME == ("DifferentLossScoresHybridRecommender" or "ScoresHybridRecommender")):
            item_weights_1 = self.recommender_1._compute_item_score(user_id_array,items_to_compute) 
        else:    
            item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
    
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        if(self.norm !=None):
            if self.norm == ("1" or "2"):
                norm_item_weights_1 = LA.norm(item_weights_1, int(self.norm))
                norm_item_weights_2 = LA.norm(item_weights_2, int(self.norm))
            elif self.norm == "inf":
                norm_item_weights_1 = LA.norm(item_weights_1, np.inf)
                norm_item_weights_2 = LA.norm(item_weights_2, np.inf)
            else:
                norm_item_weights_1 = LA.norm(item_weights_1, -np.inf)
                norm_item_weights_2 = LA.norm(item_weights_2, -np.inf)   
                
            item_weights = (item_weights_1 / norm_item_weights_1 * self.w1) + item_weights_2 / norm_item_weights_2 *(1-self.w1)
        else:
            item_weights = item_weights_1*self.w1 + item_weights_2*(1-self.w1)

        return item_weights

In [None]:
from skopt.space import Real, Integer, Categorical
import numpy as np
hyperparameters_range_dictionary = {
    'topKA': Integer(100,250),
    'shrink': Integer(12,70),
    'topKB': Integer(150,350),
    "lambda_i": Real(0.0010,0.0055),
    "lambda_j": Real(0.000005,0.0055),
    "epochs" : Integer(200, 350),
    "learning_rate" : Real(0.00045,0.04),
    'w1': Real(0.05,0.95),
    'norm': Categorical(["1","2","inf","-inf"])
}


In [None]:
earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": "MAP",
                          }

In [None]:
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from Recommenders.SLIM.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
itemKNN = ItemKNNCFRecommender(URM_train)
slim_BPR = SLIM_BPR_Cython(URM_train)
recommender_class = CoTrainingRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation)

In [None]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train, itemKNN,slim_BPR],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs
)

In [None]:
import os

output_folder_path = output_folder_path = '/Results/cotrainedBPR_KNN/'

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 100  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )