In [5]:
import numpy as np
import matplotlib.pyplot as pyplot
import pandas as pd
import scipy.sparse as sps
%matplotlib inline
%load_ext Cython

from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.SLIM.SLIMElasticNetRecommender import MultiThreadSLIM_SLIMElasticNetRecommender, SLIMElasticNetRecommender
import optuna

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [6]:
# data_train_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_train.csv"
# data_target_user_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_target_users_test.csv"
data_train_path="data_train.csv"
data_target_user_path="data_target_users_test.csv"
data_train = pd.read_csv(data_train_path)
data_target = pd.read_csv(data_target_user_path)

In [7]:
URM_all =  data_train.pivot(index='row', columns='col', values='data').fillna(0)
item_map = {i : item for i, item in enumerate(URM_all.columns)}
user_map = {i : user for i, user in enumerate(data_target["user_id"])}
item_map_inv = {item : i for i, item in item_map.items()}
user_map_inv = {user : i for i, user in user_map.items()}
missing_index = [x for x in range(1,13025) if x not in URM_all.index.tolist()]
add_urm = pd.DataFrame(index = missing_index, columns = URM_all.columns).fillna(0)
URM_all = pd.concat([URM_all, add_urm]).sort_index()
del add_urm
del missing_index
#data_target["user_id"] = data_target["user_id"]
URM_all = URM_all.to_numpy()
URM_all = sps.csr_matrix(URM_all)
URM_all

<13024x22222 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in Compressed Sparse Row format>

In [8]:
test_folds = 3

URM_trains = [None] * test_folds
evaluator_tests = [None] * test_folds
for i in range(0, test_folds):
    URM_trains[i], URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
    evaluator_tests[i] = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2507 (19.2%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2559 (19.6%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2542 (19.5%) Users that have less than 1 test interactions


In [16]:
def train_evaluate(optuna_trial):
    topk = optuna_trial.suggest_int("topK", 1, 1000)
    alpha = optuna_trial.suggest_float("alpha", 0.0, 2.0)
    l1_ratio = optuna_trial.suggest_float("l1_ratio", 0.0, 1.0)
    mAP = 0.0
    for i in range(0, test_folds):
        SLIM_recommender = SLIMElasticNetRecommender(URM_trains[i], verbose=True)
        SLIM_recommender.fit(topK=topk, alpha=alpha, l1_ratio=l1_ratio)
        result_df, _ = evaluator_tests[i].evaluateRecommender(SLIM_recommender)
        mAP += result_df["MAP"].values[0]
    mAP /= test_folds
    return mAP

In [17]:
study = optuna.create_study(direction="maximize")
study.optimize(train_evaluate, n_trials=1)

[I 2023-12-11 21:35:06,011] A new study created in memory with name: no-name-6bc42197-8e52-45ba-9013-f59b47b25252


SLIMElasticNetRecommender: URM Detected 629 ( 4.8%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 140 ( 0.6%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 4.00 min. Items per second: 92.61
EvaluatorHoldout: Processed 10517 (100.0%) in 6.26 sec. Users per second: 1680
SLIMElasticNetRecommender: URM Detected 625 ( 4.8%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 121 ( 0.5%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 4.04 min. Items per second: 91.75
EvaluatorHoldout: Processed 10465 (100.0%) in 6.17 sec. Users per second: 1696
SLIMElasticNetRecommender: URM Detected 601 ( 4.6%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 106 ( 0.5%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.91 min. Items per second: 94.73
EvaluatorHoldout: Processed 10482 (100.0%) in 6.16 sec. Users per second: 1700


[I 2023-12-11 21:47:21,379] Trial 0 finished with value: 4.210886735370108e-05 and parameters: {'topK': 83, 'alpha': 0.6079997075338561, 'l1_ratio': 0.6832414827168689}. Best is trial 0 with value: 4.210886735370108e-05.
