# RP3

In [1]:
import os
os.chdir("../")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data_train_path = "Dataset/data_train.csv"

URM_all_dataframe = pd.read_csv(data_train_path)
print(URM_all_dataframe.head(10))

   user_id  item_id  data
0        0        0   1.0
1        0        2   1.0
2        0      120   1.0
3        0      128   1.0
4        0      211   1.0
5        0      232   1.0
6        0      282   1.0
7        0      453   1.0
8        0      458   1.0
9        0      491   1.0


## Splitting dataset

In [None]:
import scipy.sparse as sps

URM_all = sps.coo_matrix((URM_all_dataframe['data'], (URM_all_dataframe['user_id'], URM_all_dataframe['item_id'])))


## K-fold Cross-validation
To find the optimal parameters, we should use cross-validation to better find our parameters

In [None]:
k_fold = 10
seeds = [192760, 1393659, 1269293, 138973, 931308, 1088652, 1361151, 1456105, 820535, 1240839]
assert len(seeds) == k_fold

In [None]:
import numpy as np
import scipy.sparse as sps
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout


URM_all = sps.coo_matrix((URM_all_dataframe['data'], (URM_all_dataframe['user_id'], URM_all_dataframe['item_id'])))
URM_all = URM_all.tocsr()
urm_trains = []
urm_validations = []
urm_train_validations = []
urm_test = []
evaluator_validation = []
evaluator_test = []

for i in range(k_fold):
    # Splitting data into 64% train, 16% validation, 20% test
    URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8, seed=seeds[i])
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8, seed=seeds[i])
    
    urm_trains.append(URM_train)
    urm_train_validations.append(URM_train_validation)
    urm_validations.append(URM_validation)
    urm_test.append(URM_test)
    
    evaluator_validation.append(EvaluatorHoldout(URM_validation, cutoff_list=[10]))
    evaluator_test.append(EvaluatorHoldout(URM_test, cutoff_list=[10]))

## Finding Optuna optimal parameters

In [None]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
import optuna

def objective_function(trial):

    params = {
        'topK': trial.suggest_int('topK', 5, 1000),
        'alpha': trial.suggest_float('alpha', 0, 2),
        'beta': trial.suggest_float('beta', 0, 2),
    }

    MAP = 0
    for i in range(k_fold):
        recommender = RP3betaRecommender(urm_trains[i])
        recommender.fit(**params)

        result_dict, _ = evaluator_validation[i].evaluateRecommender(recommender)

        MAP += result_dict["MAP"].item()
    
    MAP /= k_fold
    return MAP

In [None]:
class SaveResults(object):
    
    def __init__(self):
        self.results_df = pd.DataFrame(columns = ["result"])
    
    def __call__(self, optuna_study, optuna_trial):
        hyperparam_dict = optuna_trial.params.copy()
        hyperparam_dict["result"] = optuna_trial.values[0]
        
        self.results_df = pd.concat([self.results_df, pd.DataFrame([hyperparam_dict])], ignore_index=True)
        self.results_df.to_csv("logs/RP3_first.csv", index = False)

In [None]:
optuna_study = optuna.create_study(direction="maximize")
        
save_results = SaveResults()
        
optuna_study.optimize(objective_function,
                      callbacks=[save_results],
                      n_trials = 200)