# MF BPR Cython

## Import

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

os.getcwd()

'/home/jupyter/RecSysChallenge2021'

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

## Data Loading and Split

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['ICM_genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['ICM_event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [7]:
test_UserID_array = load_test_user_array()

In [None]:
ICM_train = sps.hstack([ICM_dict['ICM_subgenre'], ICM_dict['ICM_channel']]).tocsr()

## Optimization

In [8]:
output_folder_path = "result_experiments/MF_BPR_AUG_subgenre_channel/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

runHyperparameterSearch_Collaborative(MatrixFactorization_BPR_Cython,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

SearchBayesianSkopt: Resuming 'MatrixFactorization_BPR_Cython_Recommender' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'sgd_mode': 'adagrad', 'epochs': 1500, 'num_factors': 120, 'batch_size': 128, 'positive_reg': 0.0004178399945270588, 'negative_reg': 3.777142663380795e-05, 'learning_rate': 0.004902492102938085}
MF_BPR: Processed 14080 (100.0%) in 0.67 sec. MSE loss 2.45E-02. Sample per second: 21135
MF_BPR: Epoch 1 of 1500. Elapsed time 0.39 sec
MF_BPR: Processed 14080 (100.0%) in 0.99 sec. MSE loss 2.43E-02. Sample per second: 14274
MF_BPR: Epoch 2 of 1500. Elapsed time 0.71 sec
MF_BPR: Processed 14080 (100.0%) in 1.31 sec. MSE loss 2.41E-02. Sample per second: 10707
MF_BPR: Epoch 3 of 1500. Elapsed time 1.04 sec
MF_BPR: Processed 14080 (100.0%) in 0.64 sec. MSE loss 2.46E-02. Sample per second: 22147
MF_BPR: Epoch 4 of 1500. Elapsed time 1.36 sec
MF_BPR: Processed 14080 (100.0%) in 0.95 sec. MSE lo