In [1]:
import os
os.chdir( "../")

In [2]:
import pandas as pd
import numpy as np

path = "Dataset/data_train.csv"
df = pd.read_csv(filepath_or_buffer=path,
                               sep=",",
                               header=1,
                               engine='python',
                               names=['UserID', 'ItemID', 'Interaction'])


df

Unnamed: 0,UserID,ItemID,Interaction
0,1,15,1.0
1,1,16,1.0
2,1,133,1.0
3,1,161,1.0
4,1,187,1.0
...,...,...,...
478724,13024,13605,1.0
478725,13024,13823,1.0
478726,13024,15122,1.0
478727,13024,18185,1.0


In [3]:
df.Interaction.value_counts()

1.0    478729
Name: Interaction, dtype: int64

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 478729 entries, 0 to 478728
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   UserID       478729 non-null  int64  
 1   ItemID       478729 non-null  int64  
 2   Interaction  478729 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 11.0 MB


In [5]:
user_ids = df["UserID"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
item_ids = df["ItemID"].unique().tolist()
item2item_encoded = {x: i for i, x in enumerate(item_ids)}
item_encoded2item = {i: x for i, x in enumerate(item_ids)}
df["User"] = df["UserID"].map(user2user_encoded)
df["Item"] = df["ItemID"].map(item2item_encoded)

num_users = len(user2user_encoded)
num_items = len(item_encoded2item)
df["Interaction"] = df["Interaction"].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = 0.0
max_rating = max(df["Interaction"])

print(
    "Number of users: {}, Number of Items: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_items, min_rating, max_rating
    )
)

Number of users: 12638, Number of Items: 22222, Min rating: 0.0, Max rating: 1.0


In [6]:
df.UserID = df.User
df.ItemID = df.Item

In [7]:
df = df.drop(["User","Item"], axis=1)

In [8]:
df.head()

Unnamed: 0,UserID,ItemID,Interaction
0,0,0,1.0
1,0,1,1.0
2,0,2,1.0
3,0,3,1.0
4,0,4,1.0


In [9]:
userId_unique = df["UserID"].unique()
itemId_unique = df["ItemID"].unique()

In [11]:
num_users = len(userId_unique)
num_items = len(itemId_unique)

In [12]:
import scipy.sparse as sps

urm_all = sps.csr_matrix((df.Interaction, (df.UserID, df.ItemID)),
                         shape = (num_users, num_items))

urm_train = sps.csr_matrix((ratings_training, (user_ids_training, item_ids_training)),
                           shape = (num_users, num_items))

urm_validation = sps.csr_matrix((ratings_validation, (user_ids_validation, item_ids_validation)),
                                shape = (num_users, num_items))

urm_train_validation = sps.csr_matrix((ratings_training_validation, (user_ids_training_validation, item_ids_training_validation)),
                                      shape = (num_users, num_items))

urm_test = sps.csr_matrix((ratings_test, (user_ids_test, item_ids_test)),
                          shape = (num_users, num_items))

urm_all, urm_train, urm_test, urm_validation, urm_train_validation

(<12638x22222 sparse matrix of type '<class 'numpy.float32'>'
 	with 478729 stored elements in Compressed Sparse Row format>,
 <12638x22222 sparse matrix of type '<class 'numpy.float32'>'
 	with 345881 stored elements in Compressed Sparse Row format>,
 <12638x22222 sparse matrix of type '<class 'numpy.float32'>'
 	with 71810 stored elements in Compressed Sparse Row format>,
 <12638x22222 sparse matrix of type '<class 'numpy.float32'>'
 	with 61038 stored elements in Compressed Sparse Row format>,
 <12638x22222 sparse matrix of type '<class 'numpy.float32'>'
 	with 406919 stored elements in Compressed Sparse Row format>)

In [13]:
from Recommenders.Recommender_import_list import *
from Evaluation.Evaluator import EvaluatorHoldout
evaluator_validation = EvaluatorHoldout(urm_validation, cutoff_list=[10], ignore_users=[])
evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10], ignore_users=[])

2023-12-08 11:50:47.098653: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-12-08 11:50:47.098678: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


EvaluatorHoldout: Ignoring 3038 (24.0%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Ignoring 2769 (21.9%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 0 Users


## Insert model here

In [14]:
from HyperparameterTuning.run_hyperparameter_search import runHyperparameterSearch_Collaborative
from functools import partial

output_folder_path = "result_experiments/SKOPT_test/"

n_cases = 300
n_random_starts = int(n_cases * 0.3)

runHyperparameterSearch_Collaborative_partial = partial(
                    runHyperparameterSearch_Collaborative,
                    URM_train = urm_train,
                    URM_train_last_test = urm_train_validation,
                    metric_to_optimize = "MAP",
                    cutoff_to_optimize = 10,
                    n_cases = n_cases,
                    n_random_starts = n_random_starts,
                    evaluator_validation_earlystopping = evaluator_validation,
                    evaluator_validation = evaluator_validation,
                    evaluator_test = evaluator_test,
                    output_folder_path = output_folder_path,
                    resume_from_saved = False
)

runHyperparameterSearch_Collaborative_partial(RP3betaRecommender)

Iteration No: 1 started. Evaluating function at random point.
SearchBayesianSkopt: Testing config: {'topK': 223, 'alpha': 1.9542268376679761, 'beta': 0.1919448971073961, 'normalize_similarity': True}
RP3betaRecommender: URM Detected 343 ( 2.7%) users with no interactions.
RP3betaRecommender: URM Detected 195 ( 0.9%) items with no interactions.
RP3betaRecommender: Similarity column 22222 (100.0%), 5142.49 column/sec. Elapsed time 4.32 sec
EvaluatorHoldout: Processed 9600 (100.0%) in 2.99 sec. Users per second: 3214
SearchBayesianSkopt: New best config found. Config 0: {'topK': 223, 'alpha': 1.9542268376679761, 'beta': 0.1919448971073961, 'normalize_similarity': True} - results: PRECISION: 0.0206458, PRECISION_RECALL_MIN_DEN: 0.0523429, RECALL: 0.0495381, MAP: 0.0067518, MAP_MIN_DEN: 0.0181069, MRR: 0.0544204, NDCG: 0.0360621, F1: 0.0291450, HIT_RATE: 0.1702083, ARHR_ALL_HITS: 0.0604366, NOVELTY: 0.0060177, AVERAGE_POPULARITY: 0.0894528, DIVERSITY_MEAN_INTER_LIST: 0.9943814, DIVERSITY_HE

In [15]:
recommender = RP3betaRecommender(urm_train_validation)
recommender.load_model(output_folder_path, 
                file_name = recommender.RECOMMENDER_NAME + "_best_model.zip")

RP3betaRecommender: URM Detected 154 ( 1.2%) users with no interactions.
RP3betaRecommender: URM Detected 86 ( 0.4%) items with no interactions.
RP3betaRecommender: Loading model from file 'result_experiments/SKOPT_test/RP3betaRecommender_best_model.zip'
RP3betaRecommender: Loading complete


In [16]:
evaluator_test.evaluateRecommender(recommender)

EvaluatorHoldout: Processed 9869 (100.0%) in 2.45 sec. Users per second: 4030


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
 cutoff                                                                      
 10      0.076431                 0.155019  0.139553  0.037291      0.0753   
 
              MRR      NDCG        F1  HIT_RATE ARHR_ALL_HITS  ...  \
 cutoff                                                        ...   
 10      0.232035  0.133691  0.098768  0.468133      0.291401  ...   
 
        COVERAGE_USER COVERAGE_USER_HIT USERS_IN_GT DIVERSITY_GINI  \
 cutoff                                                              
 10          0.780899          0.365564    0.780899       0.087325   
 
        SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL RATIO_DIVERSITY_GINI  \
 cutoff                                                                   
 10           10.691689                   0.998314             0.249742   
 
        RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY RATIO_NOVELTY  
 cutoff                                   