In [1]:
import numpy as np
import matplotlib.pyplot as pyplot
import pandas as pd
import scipy.sparse as sps
%matplotlib inline  
%load_ext Cython

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# data_train_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_train.csv"
# data_target_user_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_target_users_test.csv"
data_train_path="data_train.csv"
data_target_user_path="data_target_users_test.csv"
data_train = pd.read_csv(data_train_path)
data_target = pd.read_csv(data_target_user_path)

In [3]:
URM_all =  data_train.pivot(index='row', columns='col', values='data').fillna(0)
item_map = {i : item for i, item in enumerate(URM_all.columns)}
user_map = {i : user for i, user in enumerate(data_target["user_id"])}
item_map_inv = {item : i for i, item in item_map.items()}
user_map_inv = {user : i for i, user in user_map.items()}
missing_index = [x for x in range(1,13025) if x not in URM_all.index.tolist()]
add_urm = pd.DataFrame(index = missing_index, columns = URM_all.columns).fillna(0)
URM_all = pd.concat([URM_all, add_urm]).sort_index()
del add_urm
del missing_index
#data_target["user_id"] = data_target["user_id"]
URM_all = URM_all.to_numpy()
URM_all = sps.csr_matrix(URM_all)
URM_all

<13024x22222 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in Compressed Sparse Row format>

In [4]:
test_folds = 3

URM_trains = [None] * test_folds
evaluator_tests = [None] * test_folds
for i in range(0, test_folds):
    URM_trains[i], URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
    evaluator_tests[i] = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2527 (19.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2548 (19.6%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2545 (19.5%) Users that have less than 1 test interactions


In [11]:
def train_evaluate(optuna_trial):
    factors = optuna_trial.suggest_int("topK", 10, 1000)
    alpha = optuna_trial.suggest_float("alpha", 0.0, 2.0)
    epsilon = optuna_trial.suggest_float("epsilon", 0.0, 2.0)
    reg = optuna_trial.suggest_float("reg", 0.0, 1.0)

    mAP = 0.0
    for i in range(0, 1):
        IALS = IALSRecommender(URM_trains[i], verbose=False)
        IALS.fit(epochs = 20)#num_factors=factors, alpha=alpha, epsilon=epsilon, reg=reg)
        result_df, _ = evaluator_tests[i].evaluateRecommender(IALS)
        mAP += result_df["MAP"].values[0]
    mAP /= 1
    return mAP

In [12]:
study = optuna.create_study(direction="maximize")
study.optimize(train_evaluate, n_trials=1)

[I 2023-12-11 12:01:03,574] A new study created in memory with name: no-name-4ee223c0-1d60-41c4-aec0-87736b9d564d


IALSRecommender: Epoch 1 of 20. Elapsed time 13.18 sec
IALSRecommender: Epoch 2 of 20. Elapsed time 26.16 sec
IALSRecommender: Epoch 3 of 20. Elapsed time 39.23 sec
IALSRecommender: Epoch 4 of 20. Elapsed time 52.03 sec
IALSRecommender: Epoch 5 of 20. Elapsed time 1.08 min
IALSRecommender: Epoch 6 of 20. Elapsed time 1.29 min
IALSRecommender: Epoch 7 of 20. Elapsed time 1.51 min
IALSRecommender: Epoch 8 of 20. Elapsed time 1.73 min
IALSRecommender: Epoch 9 of 20. Elapsed time 1.94 min
IALSRecommender: Epoch 10 of 20. Elapsed time 2.15 min
IALSRecommender: Epoch 11 of 20. Elapsed time 2.36 min
IALSRecommender: Epoch 12 of 20. Elapsed time 2.57 min
IALSRecommender: Epoch 13 of 20. Elapsed time 2.78 min
IALSRecommender: Epoch 14 of 20. Elapsed time 2.98 min
IALSRecommender: Epoch 15 of 20. Elapsed time 3.20 min
IALSRecommender: Epoch 16 of 20. Elapsed time 3.41 min
IALSRecommender: Epoch 17 of 20. Elapsed time 3.62 min
IALSRecommender: Epoch 18 of 20. Elapsed time 3.83 min
IALSRecommender

[I 2023-12-11 12:05:29,525] Trial 0 finished with value: 0.02782008011359268 and parameters: {'topK': 98, 'alpha': 0.8394767706601036, 'epsilon': 1.6822773623605254, 'reg': 0.1330123457937753}. Best is trial 0 with value: 0.02782008011359268.
