In [None]:
!pip install scikit-optimize



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!pip install -r /content/gdrive/MyDrive/RecSysRep/requirements.txt



In [None]:
!python /content/gdrive/MyDrive/RecSysRep/contentrun_compile_all_cython.py

python3: can't open file '/content/gdrive/MyDrive/RecSysRep/contentrun_compile_all_cython.py': [Errno 2] No such file or directory


In [None]:
import zipfile
dataFile = zipfile.ZipFile("gdrive/MyDrive/recommender-system-2021-challenge-polimi.zip")
dataFile.extractall('data')

In [None]:
import sys
sys.path.append('gdrive/MyDrive/RecSysRep/')

In [None]:
import pandas as pd
import numpy as np
import scipy.sparse as sps

def getCOOs():
    
    path = '/content/data/'
    URM_path = path + 'data_train.csv'
    URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path, 
                                    sep=",",
                                    dtype={0:int, 1:int, 2:float})
    URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]
    URM_all_dataframe.head(n=10)

    ICM_genre_path = path + 'data_ICM_genre.csv'
    ICM_genre_all_dataframe = pd.read_csv(filepath_or_buffer=ICM_genre_path, 
                                    sep=",",
                                    dtype={0:int, 1:int, 2:float})
    ICM_genre_all_dataframe.columns = ["ItemID", "GenreID", "Match"]
    ICM_genre_all_dataframe.head(n=10)

    ICM_subgenre_path =path + 'data_ICM_subgenre.csv'
    ICM_subgenre_all_dataframe = pd.read_csv(filepath_or_buffer=ICM_subgenre_path, 
                                    sep=",",
                                    dtype={0:int, 1:int, 2:float})
    ICM_subgenre_all_dataframe.columns = ["ItemID", "SubgenreID", "Match"]
    ICM_subgenre_all_dataframe.head(n=10)

    ICM_channel_path = path + 'data_ICM_channel.csv'
    ICM_channel_all_dataframe = pd.read_csv(filepath_or_buffer=ICM_channel_path, 
                                    sep=",",
                                    dtype={0:int, 1:int, 2:float})
    ICM_channel_all_dataframe.columns = ["ItemID", "ChannelID", "Match"]
    ICM_channel_all_dataframe.head(n=10)

    ICM_event_path = path + 'data_ICM_event.csv'
    ICM_event_all_dataframe = pd.read_csv(filepath_or_buffer=ICM_event_path, 
                                    sep=",",
                                    dtype={0:int, 1:int, 2:float})
    ICM_event_all_dataframe.columns = ["ItemID", "EpisodeID", "Match"]
    ICM_event_all_dataframe.head(n=10)

    URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                              (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))

    # URM_all.tocsr()

    ICM_genre_all = sps.coo_matrix((ICM_genre_all_dataframe["Match"].values, 
                              (ICM_genre_all_dataframe["ItemID"].values, ICM_genre_all_dataframe["GenreID"].values)))

    # ICM_genre_all.tocsr()

    ICM_subgenre_all = sps.coo_matrix((ICM_subgenre_all_dataframe["Match"].values, 
                              (ICM_subgenre_all_dataframe["ItemID"].values, ICM_subgenre_all_dataframe["SubgenreID"].values)))

    # ICM_subgenre_all.tocsr().data

    ICM_channel_all = sps.coo_matrix((ICM_channel_all_dataframe["Match"].values, 
                              (ICM_channel_all_dataframe["ItemID"].values, ICM_channel_all_dataframe["ChannelID"].values)))

    # ICM_channel_all.tocsr()

    ICM_event_all = sps.coo_matrix((ICM_event_all_dataframe["Match"].values, 
                              (ICM_event_all_dataframe["ItemID"].values, ICM_event_all_dataframe["EpisodeID"].values)))


    return URM_all, ICM_genre_all, ICM_subgenre_all, ICM_channel_all, ICM_event_all

def getSplit(URM_all, seed = 1234, split = 0.8):
    
    np.random.seed(seed)

    train_test_split = split

    n_interactions = URM_all.nnz


    train_mask = np.random.choice([True,False], n_interactions, p=[train_test_split, 1-train_test_split])
    train_mask

    URM_train = sps.csr_matrix((URM_all.data[train_mask],
                                (URM_all.row[train_mask], URM_all.col[train_mask])))

    val_mask = np.logical_not(train_mask)

    URM_val = sps.csr_matrix((URM_all.data[val_mask],
                                (URM_all.row[val_mask], URM_all.col[val_mask])))

    return URM_train, URM_val

URM_all, ICM_genre_all, ICM_subgenre_all, ICM_channel_all, ICM_event_all = getCOOs()
# URM_train, URM_val = ld.getSplit(URM_train_val, 5678, 0.8)

In [None]:
import os

output_folder_path = "gdrive/MyDrive/result_experiments/"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 10  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

# SLIM Model

In [None]:
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)



In [None]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

In [None]:
from skopt.space import Real, Integer, Categorical

hyperparameters_range_dictionary = {
    "epochs": Categorical([500]),
    "num_factors": Integer(1, 200),
    "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
    "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
    "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
    "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
}

earlystopping_keywargs = {"validation_every_n": 5,
                          "stop_on_validation": True,
                          "evaluator_object": evaluator_validation,
                          "lower_validations_allowed": 5,
                          "validation_metric": metric_to_optimize,
                          }

In [None]:
from Recommenders.MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_FunkSVD_Cython
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt

recommender_class = MatrixFactorization_FunkSVD_Cython

hyperparameterSearch = SearchBayesianSkopt(recommender_class,
                                         evaluator_validation=evaluator_validation,
                                         evaluator_test=evaluator_test)

In [None]:
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs
  
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],     # For a CBF model simply put [URM_train, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs     # Additiona hyperparameters for the fit function
)

In [None]:
recommender_input_args_last_test = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train_validation],     # For a CBF model simply put [URM_train_validation, ICM_train]
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = earlystopping_keywargs     # Additiona hyperparameters for the fit function
)

In [None]:
import os

output_folder_path = "result_experiments/2_SLIM"

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    
n_cases = 50  # using 10 as an example
n_random_starts = int(n_cases*0.3)
metric_to_optimize = "MAP"   
cutoff_to_optimize = 10

In [None]:
hyperparameterSearch.search(recommender_input_args,
                       recommender_input_args_last_test = recommender_input_args_last_test,
                       hyperparameter_search_space = hyperparameters_range_dictionary,
                       n_cases = n_cases,
                       n_random_starts = n_random_starts,
                       save_model = "last",
                       output_folder_path = output_folder_path, # Where to save the results
                       output_file_name_root = recommender_class.RECOMMENDER_NAME, # How to call the files
                       metric_to_optimize = metric_to_optimize,
                       cutoff_to_optimize = cutoff_to_optimize,
                      )

In [None]:
from Recommenders.DataIO import DataIO

data_loader = DataIO(folder_path = output_folder_path)
search_metadata = data_loader.load_data(recommender_class.RECOMMENDER_NAME + "_metadata.zip")

search_metadata.keys()

dict_keys(['exception_list', 'result_on_test_df', 'time_on_train_total', 'hyperparameters_df', 'time_df', 'algorithm_name_recommender', 'metric_to_optimize', 'algorithm_name_search', 'result_on_last', 'hyperparameters_best_index', 'cutoff_to_optimize', 'time_on_validation_total', 'result_on_validation_best', 'hyperparameters_best', 'time_on_last_df', 'time_on_validation_avg', 'time_on_test_avg', 'time_on_test_total', 'result_on_test_best', 'result_on_validation_df', 'time_on_train_avg'])

In [None]:
hyperparameters_df = search_metadata["hyperparameters_best"]
hyperparameters_df

{'topK': 394, 'l1_ratio': 0.00010521339601066162, 'alpha': 0.6724364066046767}

In [None]:
result_on_validation_df = search_metadata["result_on_validation_df"]
result_on_validation_df

Unnamed: 0_level_0,Unnamed: 1_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_ITEM_CORRECT,COVERAGE_USER,COVERAGE_USER_CORRECT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
Unnamed: 0_level_1,cutoff,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,10,0.200455,0.201778,0.042897,0.098151,0.098769,0.433959,0.212349,0.070671,0.856064,0.649271,...,0.005094,0.999121,0.855311,0.001719,5.324624,0.968452,0.006937,0.429783,3.630297,0.0305
1,10,0.235841,0.237913,0.053577,0.115812,0.116775,0.463988,0.24417,0.087317,0.910691,0.730142,...,0.021485,0.999121,0.90989,0.003725,6.370463,0.979809,0.015036,0.514199,3.293444,0.031096
2,10,0.229344,0.231287,0.051908,0.111981,0.112858,0.459398,0.238085,0.084656,0.904018,0.714103,...,0.017609,0.999121,0.903223,0.004215,6.604593,0.98336,0.017014,0.533097,3.230412,0.031145
3,10,0.002904,0.002904,0.000505,0.000846,0.000846,0.008154,0.002876,0.00086,0.028303,0.008306,...,0.00443,0.999121,0.028278,0.004786,6.804512,0.989728,0.019318,0.549233,0.111747,0.053102
4,10,0.188158,0.189203,0.03894,0.093363,0.093919,0.427938,0.202569,0.064526,0.830254,0.62847,...,0.002492,0.999121,0.829524,0.001331,4.856,0.958854,0.005372,0.391957,3.685853,0.030687
5,10,0.002904,0.002904,0.000505,0.000846,0.000846,0.008154,0.002876,0.00086,0.028303,0.008306,...,0.00443,0.999121,0.028278,0.004786,6.804512,0.989728,0.019318,0.549233,0.111747,0.053102
6,10,0.218214,0.219981,0.048546,0.10672,0.107494,0.451496,0.228488,0.079423,0.886934,0.690827,...,0.011739,0.999121,0.886154,0.003171,6.227659,0.980072,0.012798,0.502672,3.375821,0.030861
7,10,0.002904,0.002904,0.000505,0.000846,0.000846,0.008154,0.002876,0.00086,0.028303,0.008306,...,0.00443,0.999121,0.028278,0.004786,6.804512,0.989728,0.019318,0.549233,0.111747,0.053102
8,10,0.002904,0.002904,0.000505,0.000846,0.000846,0.008154,0.002876,0.00086,0.028303,0.008306,...,0.00443,0.999121,0.028278,0.004786,6.804512,0.989728,0.019318,0.549233,0.111747,0.053102
9,10,0.009173,0.009236,0.00196,0.007152,0.007215,0.070408,0.016736,0.00323,0.089236,0.070953,...,0.004541,0.999121,0.089158,0.0048,6.809827,0.989731,0.019373,0.549662,0.217751,0.052566
