In [1]:
!cp -r ../input/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./
%config Completer.use_jedi = False
%load_ext Cython
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout


In [2]:
URM_all_dataframe = pd.read_csv('/kaggle/input/urm-true-binary/URM_True_Binary.csv')
URM_all_dataframe
URM_all = sps.coo_matrix((URM_all_dataframe["Data"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)
URM_all

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1554640 stored elements in Compressed Sparse Row format>

In [3]:
URM_train_dataframe = pd.read_csv('/kaggle/input/urm-split/Train_df.csv')
URM_train_dataframe
URM_train = sps.coo_matrix((URM_train_dataframe["Data"].values, 
                          (URM_train_dataframe["UserID"].values, URM_train_dataframe["ItemID"].values)))
URM_train = URM_train.tocsr() # to obtain fast access to rows (users)
URM_train

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1243712 stored elements in Compressed Sparse Row format>

In [4]:
URM_val_dataframe = pd.read_csv('/kaggle/input/urm-split/Test_df.csv')
URM_val_dataframe
URM_valid = sps.coo_matrix((URM_val_dataframe["Data"].values, 
                          (URM_val_dataframe["UserID"].values, URM_val_dataframe["ItemID"].values)))
URM_valid = URM_valid.tocsr() # to obtain fast access to rows (users)
URM_valid

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 310928 stored elements in Compressed Sparse Row format>

In [5]:
ICM_type_df = pd.read_csv("/kaggle/input/competition-data/data_ICM_type.csv")
ICM_type_df
items = ICM_type_df.item_id
features = ICM_type_df.feature_id
data = ICM_type_df.data
ICM_type = sps.csr_matrix((data, (items, features)))
ICM_type = ICM_type.astype(dtype = np.int32)
ICM_type.shape

(27968, 8)

In [6]:
from Evaluation.Evaluator import EvaluatorHoldout

#create an evaluator object to evaluate validation set
#we will use it for hyperparameter tuning
evaluator_valid = EvaluatorHoldout(URM_valid, cutoff_list=[10])

EvaluatorHoldout: Ignoring 323 ( 0.8%) Users that have less than 1 test interactions


In [7]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

#try a SLIM BPR model
recommender_class = RP3betaRecommender
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 1000
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [8]:
from skopt.space import Real, Integer, Categorical
#RP3 beta
#to tuning hyperparam are typical of ML models to drive the learning process

hyperparameters_range_dictionary = {
    "alpha": Real(low=0.7, high=1.2, prior='uniform'),
    "beta": Real(low=0.1, high=0.8, prior='uniform'),
    "topK": Integer(22, 70),
    "implicit": Categorical([True, False])
}

from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

#create a bayesian optimizer object, we pass the recommender and the evaluator
hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_valid)
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
#provide data needed to create instance of model (one on URM_train, the other on URM_all)
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_all],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {}
)

In [9]:
#let's run the bayesian search
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'alpha': 0.7045365389623022, 'beta': 0.14871375102050294, 'topK': 37, 'implicit': False}
RP3betaRecommender: Similarity column 24507 (100.0%), 2628.03 column/sec. Elapsed time 9.33 sec
EvaluatorHoldout: Processed 41306 (100.0%) in 30.42 sec. Users per second: 1358
SearchBayesianSkopt: New best config found. Config 0: {'alpha': 0.7045365389623022, 'beta': 0.14871375102050294, 'topK': 37, 'implicit': False} - results: PRECISION: 0.0583087, PRECISION_RECALL_MIN_DEN: 0.0780889, RECALL: 0.0671935, MAP: 0.0301491, MAP_MIN_DEN: 0.0401465, MRR: 0.1916122, NDCG: 0.0835551, F1: 0.0624366, HIT_RATE: 0.3632886, ARHR_ALL_HITS: 0.2382018, NOVELTY: 0.0048828, AVERAGE_POPULARITY: 0.1123830, DIVERSITY_MEAN_INTER_LIST: 0.9781054, DIVERSITY_HERFINDAHL: 0.9978082, COVERAGE_ITEM: 0.7946709, COVERAGE_ITEM_HIT: 0.0730812, ITEMS_IN_GT: 0.9983678, COVERAGE_USER: 0.9922410, COVERAGE_USER_HIT: 0.3604699, USERS_IN_

KeyboardInterrupt: 

In [None]:
from Recommenders.DataIO import DataIO

#explore the results of the search
data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

In [None]:
hyperparameters_df = search_metadata["hyperparameters_df"]
hyperparameters_df

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

In [None]:
best_hyperparameters = search_metadata["hyperparameters_best"]
best_hyperparameters

In [None]:
#let's fit the model with the hyperparamethers obtained from the previous search and evaluate them on validation set

recommender = RP3betaRecommender(URM_all)
recommender.fit('alpha': 0.7743102579193515, 'beta': 0.37166682896668013, 'topK': 38, 'implicit': False)

In [None]:
test_users = pd.read_csv('../input/competition-data/data_target_users_test.csv')
test_users

In [None]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user,cutoff = 10))
    
    
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])
    
test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('submissionRp3.csv', index=False)