# Hybrid

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

from Recommenders.DataIO import DataIO

Tensorflow is not available


In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')




In [7]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [8]:
test_UserID_array = load_test_user_array()

In [9]:
output_folder_path = "result_experiments/ScoresHybrid_RP3beta_TopPop/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data("RP3betaRecommender_UserKNNCFRecommender_metadata.zip")

#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.23

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
#print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
#print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].topK))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,alpha,MAP
0,0.570595,0.233668
1,0.153305,0.231442
2,0.85582,0.23601
3,0.264555,0.232061
4,0.424738,0.232865
5,0.407697,0.232735
6,0.433571,0.232921
7,0.539257,0.233495
8,0.449138,0.232954
9,0.163723,0.231507


Best MAP:  0.23638558249524907


Unnamed: 0,alpha,MAP
21,0.899907,0.236386


In [10]:
output_folder_path = "result_experiments/ScoresHybrid_RP3beta_SLIMElasticNet/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [11]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search_ScoresHybrid import runHyperparameterSearch_ScoresHybrid

print('Fitting ...')

RP3beta = RP3betaRecommender(URM_aug_train)
RP3beta.fit(topK = 51, alpha = 0.620106, beta =	0.566617, normalize_similarity = True)

Top = TopPop(URM_train)
Top.fit()

UserKNNCF = UserKNNCFRecommender(URM_train)
UserKNNCF.fit(topK= 777, shrink = 0, similarity = 'tversky', normalize = True, tversky_alpha = 2.0, tversky_beta = 1.2531451455933782)

SLIMElasticNet = MultiThreadSLIM_SLIMElasticNetRecommender(URM_aug_train)
SLIMElasticNet.fit(topK = 673, l1_ratio = 0.001155, alpha = 0.073102)

print('Optimize parameters ...')

runHyperparameterSearch_ScoresHybrid([RP3beta, SLIMElasticNet],
                                      URM_train = URM_aug_train,
                                      ICM_object = None,
                                      ICM_name = '',
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = False,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

Fitting ...
Similarity column 13650 (100.0%), 398.73 column/sec. Elapsed time 34.23 sec


  1%|▏         | 232/18059 [00:59<1:10:48,  4.20it/s]

KeyboardInterrupt: 