# Pure SVD

## Import

In [1]:
## Allow more than one output for a single code cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import pandas as pd
import scipy.sparse as sps
import numpy as np
import os

from skopt.space import Real, Integer, Categorical

## Set the numpy random seed
SEED = 42
np.random.seed(SEED)

os.getcwd()

'/home/jupyter/RecSysChallenge2021'

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Evaluation.Evaluator import EvaluatorHoldout

from Recommenders.Recommender_import_list import *

from Recommenders.DataIO import DataIO

In [4]:
## Utility Functions
from Dataset.load_data import load_data
from Dataset.write_submission import write_submission
from Dataset.load_test_user_array import load_test_user_array

## Data Loading and Split

In [5]:
URM_all, ICM_dict = load_data()

In [6]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)

URM_aug_train = sps.vstack([URM_train.copy().tocoo(), 
                            #ICM_dict['ICM_genre'].T.tocoo(),
                            ICM_dict['ICM_subgenre'].T.tocoo(), 
                            #ICM_dict['ICM_event'].T.tocoo(), 
                            ICM_dict['ICM_channel'].T.tocoo()], format='csr')

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], exclude_seen = True)

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [7]:
test_UserID_array = load_test_user_array()

## Optimization

In [10]:
output_folder_path = "result_experiments/PureSVD_AUG_subgenre_channel/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(PureSVDRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.23

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,num_factors,MAP
15,34,0.231957
18,25,0.232404
22,21,0.231231
26,29,0.23236
28,27,0.231184
29,31,0.231435
30,38,0.230926
32,23,0.231512
34,36,0.230165
40,33,0.231404


Max value for the range:  38
Min value for the range:  21
Best MAP:  0.23260277155466252


Unnamed: 0,num_factors,MAP
43,24,0.232603
44,24,0.232603
45,24,0.232603
47,24,0.232603
48,24,0.232603
49,24,0.232603


In [10]:
output_folder_path = "result_experiments/PureSVD_AUG_subgenre_channel_improved_range/"

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(PureSVDRecommender.RECOMMENDER_NAME + "_metadata.zip")
#search_metadata.keys()

hyperparameters_df = search_metadata["hyperparameters_df"]
result_on_validation_df = search_metadata["result_on_validation_df"]

hyperparameters_df['MAP'] = result_on_validation_df['MAP'].values.tolist()

threshold = 0.23

hyperparameters_df[hyperparameters_df['MAP'] > threshold]
print('Max value for the range: ', max(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Min value for the range: ', min(hyperparameters_df[hyperparameters_df['MAP'] > threshold].num_factors))
print('Best MAP: ', max(hyperparameters_df['MAP']))
hyperparameters_df[max(hyperparameters_df['MAP']) == hyperparameters_df['MAP']]

Unnamed: 0,num_factors,MAP
1,28,0.232788
2,26,0.232011
4,27,0.232152
5,37,0.230875
6,39,0.231174
8,35,0.231914
10,34,0.231389
11,24,0.232529
12,29,0.232367
14,21,0.231277


Max value for the range:  39
Min value for the range:  20
Best MAP:  0.23278767951071105


Unnamed: 0,num_factors,MAP
1,28,0.232788
22,28,0.232788
23,28,0.232788
30,28,0.232788
37,28,0.232788
38,28,0.232788
45,28,0.232788
49,28,0.232788


In [8]:
output_folder_path = "result_experiments/PureSVD_AUG_subgenre_channel_improved_range/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

runHyperparameterSearch_Collaborative(PureSVDRecommender,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

In [8]:
output_folder_path = "result_experiments/PureSVD_Item_AUG_subgenre_channel/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [9]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender, PureSVDItemRecommender

runHyperparameterSearch_Collaborative(PureSVDItemRecommender,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

SearchBayesianSkopt: Resuming 'PureSVDItemRecommender' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 215, 'topK': 87}
PureSVDItemRecommender: Computing SVD decomposition...
PureSVDItemRecommender: Computing SVD decomposition... Done!
EvaluatorHoldout: Processed 13646 (100.0%) in 28.17 sec. Users per second: 484
SearchBayesianSkopt: New best config found. Config 0: {'num_factors': 215, 'topK': 87} - results: PRECISION: 0.2727906, PRECISION_RECALL_MIN_DEN: 0.2736618, RECALL: 0.0469139, MAP: 0.1525421, MAP_MIN_DEN: 0.1529007, MRR: 0.5432545, NDCG: 0.2911290, F1: 0.0800593, HIT_RATE: 0.9150667, ARHR_ALL_HITS: 0.9000972, NOVELTY: 0.0058064, AVERAGE_POPULARITY: 0.3487272, DIVERSITY_MEAN_INTER_LIST: 0.9481612, DIVERSITY_HERFINDAHL: 0.9948092, COVERAGE_ITEM: 0.0566477, COVERAGE_ITEM_CORRECT: 0.0402569, COVERAGE_USER: 0.9997070, COVERAGE_USER_CORRECT: 0.9147985, DIVERSITY_GINI: 0.0114977, SHANNON_

In [15]:
output_folder_path = "result_experiments/NMF_AUG_subgenre_channel/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50  # 50 with 30% random is a good number
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [16]:
from functools import partial
import os, multiprocessing

from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative

runHyperparameterSearch_Collaborative(NMFRecommender,
                                      URM_train = URM_aug_train,
                                      URM_train_last_test = None,
                                      metric_to_optimize = metric_to_optimize,
                                      cutoff_to_optimize = cutoff_to_optimize,
                                      n_cases = n_cases,
                                      n_random_starts = n_random_starts,
                                      evaluator_validation_earlystopping = evaluator_validation,
                                      evaluator_validation = evaluator_validation,
                                      evaluator_test = None,
                                      output_folder_path = output_folder_path,
                                      resume_from_saved = True,
                                      similarity_type_list = None,
                                      parallelizeKNN = True)

SearchBayesianSkopt: Resuming 'NMFRecommender' Failed, no such file exists.

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 205, 'solver': 'coordinate_descent', 'init_type': 'nndsvda', 'beta_loss': 'kullback-leibler'}
NMFRecommender: Computing NMF decomposition...
SearchBayesianSkopt: Config 0 Exception. Config: {'num_factors': 205, 'solver': 'coordinate_descent', 'init_type': 'nndsvda', 'beta_loss': 'kullback-leibler'} - Exception: Traceback (most recent call last):
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 464, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 332, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(c

Traceback (most recent call last):
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 464, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 332, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current_fit_hyperparameters)
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 304, in _fit_model
    recommender_instance.fit(*self.recommender_input_args.FIT_POSITIONAL_ARGS,
  File "/home/jupyter/RecSysChallenge2021/Recommenders/MatrixFactorization/NMFRecommender.py", line 70, in fit
    nmf_solver.fit(self.URM_train)
  File "/opt/conda/envs/RecSysFramework/lib/python3.8/site-packages/sklearn/decomposition/_nmf.py", line 1342, in fit
    self.fit_transform

Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.1894
Function value obtained: 65504.0000
Current minimum: 65504.0000
Iteration No: 2 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'num_factors': 135, 'solver': 'multiplicative_update', 'init_type': 'nndsvda', 'beta_loss': 'frobenius'}
NMFRecommender: Computing NMF decomposition...
NMFRecommender: Computing NMF decomposition... done in 7.97 min
EvaluatorHoldout: Processed 13646 (100.0%) in 36.81 sec. Users per second: 371
SearchBayesianSkopt: New best config found. Config 1: {'num_factors': 135, 'solver': 'multiplicative_update', 'init_type': 'nndsvda', 'beta_loss': 'frobenius'} - results: PRECISION: 0.3111241, PRECISION_RECALL_MIN_DEN: 0.3119430, RECALL: 0.0513443, MAP: 0.1794743, MAP_MIN_DEN: 0.1798385, MRR: 0.5628120, NDCG: 0.3256668, F1: 0.0881425, HIT_RATE: 0.9284772, ARHR_ALL_HITS: 0.9902430, NOVELTY: 0.0058836, AVERAGE_POPULARITY: 0.3435591, DIVERSITY_MEAN_INTER_LIST: 0.96

Traceback (most recent call last):
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 464, in _objective_function
    result_df, recommender_instance = self._evaluate_on_validation(current_fit_hyperparameters_dict, was_already_evaluated_flag, was_already_evaluated_index)
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 332, in _evaluate_on_validation
    recommender_instance, train_time = self._fit_model(current_fit_hyperparameters)
  File "/home/jupyter/RecSysChallenge2021/HyperparameterTuning/SearchAbstractClass.py", line 304, in _fit_model
    recommender_instance.fit(*self.recommender_input_args.FIT_POSITIONAL_ARGS,
  File "/home/jupyter/RecSysChallenge2021/Recommenders/MatrixFactorization/NMFRecommender.py", line 70, in fit
    nmf_solver.fit(self.URM_train)
  File "/opt/conda/envs/RecSysFramework/lib/python3.8/site-packages/sklearn/decomposition/_nmf.py", line 1342, in fit
    self.fit_transform

NMFRecommender: Computing NMF decomposition...


KeyboardInterrupt: 