# P3Alpha

### Import libraries

In [12]:
import pandas as pd
import numpy as np
from scipy import sparse
import os
import csv

from utilities import load_sparse_matrix

### Load dataset

In [13]:
ICM = load_sparse_matrix("icm.npz")
URM = load_sparse_matrix("urm.npz")
URM_test = load_sparse_matrix("urm_test.npz")
URM_train = load_sparse_matrix("urm_train.npz")
URM_validation = load_sparse_matrix("urm_val.npz")
URM_train_validation = load_sparse_matrix("urm_train_val.npz")

Loaded icm.npz
Loaded urm.npz
Loaded urm_test.npz
Loaded urm_train.npz
Loaded urm_val.npz
Loaded urm_train_val.npz


In [22]:
from src.Evaluation.Evaluator import EvaluatorHoldout

evaluator_val = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 759 ( 1.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 322 ( 0.8%) Users that have less than 1 test interactions


In [23]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "topK": Integer(5, 1000),
    "shrink": Integer(0, 1000),
    "similarity": Categorical(["cosine"]),
    "normalize": Categorical([True, False]),
}

In [24]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 100  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [25]:
from src.Evaluation.Evaluator import EvaluatorHoldout

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 759 ( 1.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 322 ( 0.8%) Users that have less than 1 test interactions


In [26]:
from src.Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from src.HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = ItemKNNCFRecommender

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

In [27]:
from src.HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

In [33]:
import os

output_folder_path = "result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 100  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 100

In [31]:
hyperparameterSearch.search(
       recommender_input_args,
       recommender_input_args_last_test = recommender_input_args_last_test,
       hyperparameter_search_space = hyperparameters_range_dictionary,
       n_cases = n_cases,
       n_random_starts = n_random_starts,
       save_model = "last",
       output_folder_path = output_folder_path, # Where to save the results
       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
       metric_to_optimize = metric_to_optimize,
       cutoff_to_optimize = cutoff_to_optimize,
)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 793, 'shrink': 762, 'similarity': 'cosine', 'normalize': True}
ItemKNNCFRecommender: URM Detected 3461 (12.4%) items with no interactions.
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 27968 (100.0%), 225.18 column/sec. Elapsed time 2.07 min
EvaluatorHoldout: Processed 40870 (100.0%) in 1.34 min. Users per second: 507
SearchBayesianSkopt: New best config found. Config 0: {'topK': 793, 'shrink': 762, 'similarity': 'cosine', 'normalize': True} - results: PRECISION: 0.0379912, PRECISION_RECALL_MIN_DEN: 0.0617753, RECALL: 0.0574406, MAP: 0.0169276, MAP_MIN_DEN: 0.0272486, MRR: 0.1261585, NDCG: 0.0584841, F1: 0.0457340, HIT_RATE: 0.2813066, ARHR_ALL_HITS: 0.1455158, NOVELTY: 0.0033503, AVERAGE_POPULARITY: 0.3440567, DIVERSITY_MEAN_INTER_LIST: 0.7557499, DIVERSITY_HERFINDAHL: 0.9755731, COVERAGE_ITEM: 0.1612557, COVERAGE_ITEM_HIT: 0.0165546, ITEMS_IN_G

### Save recommendations

In [32]:
from utilities import save_recommendations

rec = ItemKNNCFRecommender(URM)
rec.load_model(
    output_folder_path, 
    file_name=rec.RECOMMENDER_NAME + "_best_model_last.zip"
)
save_recommendations(rec)

ItemKNNCFRecommender: URM Detected 3461 (12.4%) items with no interactions.
ItemKNNCFRecommender: Loading model from file 'result_experiments/ItemKNNCFRecommender_best_model_last.zip'
ItemKNNCFRecommender: Loading complete
Saving recommendations finished!                                                                    
Saving recommendations    41600 of    41116