In [1]:
import numpy as np
import matplotlib.pyplot as pyplot
import pandas as pd
import scipy.sparse as sps
%matplotlib inline
%load_ext Cython

from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.MatrixFactorization.PureSVDRecommender import PureSVDRecommender
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# data_train_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_train.csv"
# data_target_user_path="/kaggle/input/recommender-system-2023-challenge-polimi/data_target_users_test.csv"
data_train_path="data_train.csv"
data_target_user_path="data_target_users_test.csv"
data_train = pd.read_csv(data_train_path)
data_target = pd.read_csv(data_target_user_path)

In [3]:
URM_all =  data_train.pivot(index='row', columns='col', values='data').fillna(0)
item_map = {i : item for i, item in enumerate(URM_all.columns)}
user_map = {i : user for i, user in enumerate(data_target["user_id"])}
item_map_inv = {item : i for i, item in item_map.items()}
user_map_inv = {user : i for i, user in user_map.items()}
missing_index = [x for x in range(1,13025) if x not in URM_all.index.tolist()]
add_urm = pd.DataFrame(index = missing_index, columns = URM_all.columns).fillna(0)
URM_all = pd.concat([URM_all, add_urm]).sort_index()
del add_urm
del missing_index
#data_target["user_id"] = data_target["user_id"]
URM_all = URM_all.to_numpy()
URM_all = sps.csr_matrix(URM_all)
URM_all

<13024x22222 sparse matrix of type '<class 'numpy.float64'>'
	with 478730 stored elements in Compressed Sparse Row format>

In [4]:
test_folds = 3

URM_trains = [None] * test_folds
evaluator_tests = [None] * test_folds
for i in range(0, test_folds):
    URM_trains[i], URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
    evaluator_tests[i] = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2494 (19.1%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2564 (19.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2552 (19.6%) Users that have less than 1 test interactions


In [9]:
def train_evaluate(optuna_trial):
    factors = optuna_trial.suggest_int("factors", 1, 1000)
    
    mAP = 0.0
    for i in range(0, test_folds):
        SLIM_recommender = PureSVDRecommender(URM_trains[i], verbose=False)
        SLIM_recommender.fit(num_factors=factors)
        result_df, _ = evaluator_tests[i].evaluateRecommender(SLIM_recommender)
        mAP += result_df["MAP"].values[0]
    mAP /= test_folds
    return mAP

In [10]:
study = optuna.create_study(direction="maximize")
study.optimize(train_evaluate, n_trials=500)

[I 2023-12-11 21:55:11,738] A new study created in memory with name: no-name-b8603afe-ab9d-4356-8a8f-17e6c034849e


EvaluatorHoldout: Processed 10530 (100.0%) in 9.50 sec. Users per second: 1108
EvaluatorHoldout: Processed 10460 (100.0%) in 9.48 sec. Users per second: 1103
EvaluatorHoldout: Processed 10472 (100.0%) in 9.58 sec. Users per second: 1093


[I 2023-12-11 21:55:59,113] Trial 0 finished with value: 0.02956656536732009 and parameters: {'factors': 235}. Best is trial 0 with value: 0.02956656536732009.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.56 sec. Users per second: 997
EvaluatorHoldout: Processed 10460 (100.0%) in 10.39 sec. Users per second: 1007
EvaluatorHoldout: Processed 10472 (100.0%) in 9.96 sec. Users per second: 1051


[I 2023-12-11 21:56:43,625] Trial 1 finished with value: 0.03069235079246697 and parameters: {'factors': 150}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 13.87 sec. Users per second: 759
EvaluatorHoldout: Processed 10460 (100.0%) in 15.16 sec. Users per second: 690
EvaluatorHoldout: Processed 10472 (100.0%) in 14.03 sec. Users per second: 746


[I 2023-12-11 21:59:29,404] Trial 2 finished with value: 0.027443541012505984 and parameters: {'factors': 645}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 15.82 sec. Users per second: 665
EvaluatorHoldout: Processed 10460 (100.0%) in 16.16 sec. Users per second: 647
EvaluatorHoldout: Processed 10472 (100.0%) in 15.28 sec. Users per second: 685


[I 2023-12-11 22:02:07,980] Trial 3 finished with value: 0.02648563963101915 and parameters: {'factors': 948}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.47 sec. Users per second: 918
EvaluatorHoldout: Processed 10460 (100.0%) in 11.76 sec. Users per second: 889
EvaluatorHoldout: Processed 10472 (100.0%) in 12.13 sec. Users per second: 863


[I 2023-12-11 22:03:23,434] Trial 4 finished with value: 0.028665898143992907 and parameters: {'factors': 363}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 13.38 sec. Users per second: 787
EvaluatorHoldout: Processed 10460 (100.0%) in 13.62 sec. Users per second: 768
EvaluatorHoldout: Processed 10472 (100.0%) in 13.34 sec. Users per second: 785


[I 2023-12-11 22:05:06,557] Trial 5 finished with value: 0.027297525994229455 and parameters: {'factors': 762}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 9.94 sec. Users per second: 1059
EvaluatorHoldout: Processed 10460 (100.0%) in 10.07 sec. Users per second: 1039
EvaluatorHoldout: Processed 10472 (100.0%) in 10.45 sec. Users per second: 1003


[I 2023-12-11 22:05:39,764] Trial 6 finished with value: 0.030253242568520352 and parameters: {'factors': 56}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 12.39 sec. Users per second: 850
EvaluatorHoldout: Processed 10460 (100.0%) in 12.63 sec. Users per second: 828
EvaluatorHoldout: Processed 10472 (100.0%) in 12.78 sec. Users per second: 819


[I 2023-12-11 22:07:16,935] Trial 7 finished with value: 0.02845893447192471 and parameters: {'factors': 401}. Best is trial 1 with value: 0.03069235079246697.


EvaluatorHoldout: Processed 10530 (100.0%) in 15.70 sec. Users per second: 671
EvaluatorHoldout: Processed 10460 (100.0%) in 16.15 sec. Users per second: 648
EvaluatorHoldout: Processed 10472 (100.0%) in 17.23 sec. Users per second: 608


[I 2023-12-11 22:08:26,615] Trial 8 finished with value: 0.030897424947406372 and parameters: {'factors': 118}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 16.67 sec. Users per second: 632
EvaluatorHoldout: Processed 10460 (100.0%) in 16.34 sec. Users per second: 640
EvaluatorHoldout: Processed 10472 (100.0%) in 16.85 sec. Users per second: 621


[I 2023-12-11 22:09:18,567] Trial 9 finished with value: 0.025759893925419608 and parameters: {'factors': 18}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 21.85 sec. Users per second: 482
EvaluatorHoldout: Processed 10460 (100.0%) in 18.82 sec. Users per second: 556
EvaluatorHoldout: Processed 10472 (100.0%) in 12.12 sec. Users per second: 864


[I 2023-12-11 22:11:36,131] Trial 10 finished with value: 0.027987965406327652 and parameters: {'factors': 560}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.42 sec. Users per second: 1010
EvaluatorHoldout: Processed 10460 (100.0%) in 10.53 sec. Users per second: 993
EvaluatorHoldout: Processed 10472 (100.0%) in 10.62 sec. Users per second: 986


[I 2023-12-11 22:12:24,047] Trial 11 finished with value: 0.030118618838149733 and parameters: {'factors': 184}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.41 sec. Users per second: 1011
EvaluatorHoldout: Processed 10460 (100.0%) in 10.44 sec. Users per second: 1002
EvaluatorHoldout: Processed 10472 (100.0%) in 10.57 sec. Users per second: 991


[I 2023-12-11 22:13:13,890] Trial 12 finished with value: 0.029983785824484677 and parameters: {'factors': 203}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.13 sec. Users per second: 946
EvaluatorHoldout: Processed 10460 (100.0%) in 11.22 sec. Users per second: 932
EvaluatorHoldout: Processed 10472 (100.0%) in 11.26 sec. Users per second: 930


[I 2023-12-11 22:14:17,162] Trial 13 finished with value: 0.0286905070791898 and parameters: {'factors': 352}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.17 sec. Users per second: 1036
EvaluatorHoldout: Processed 10460 (100.0%) in 10.35 sec. Users per second: 1011
EvaluatorHoldout: Processed 10472 (100.0%) in 10.46 sec. Users per second: 1001


[I 2023-12-11 22:15:01,068] Trial 14 finished with value: 0.030641533271362566 and parameters: {'factors': 143}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 9.50 sec. Users per second: 1108
EvaluatorHoldout: Processed 10460 (100.0%) in 9.21 sec. Users per second: 1136
EvaluatorHoldout: Processed 10472 (100.0%) in 9.31 sec. Users per second: 1125


[I 2023-12-11 22:15:29,525] Trial 15 finished with value: 0.021346381418768944 and parameters: {'factors': 6}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.79 sec. Users per second: 976
EvaluatorHoldout: Processed 10460 (100.0%) in 11.05 sec. Users per second: 947
EvaluatorHoldout: Processed 10472 (100.0%) in 11.40 sec. Users per second: 919


[I 2023-12-11 22:16:28,005] Trial 16 finished with value: 0.028964126845343552 and parameters: {'factors': 281}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 12.23 sec. Users per second: 861
EvaluatorHoldout: Processed 10460 (100.0%) in 12.40 sec. Users per second: 844
EvaluatorHoldout: Processed 10472 (100.0%) in 12.59 sec. Users per second: 832


[I 2023-12-11 22:17:54,507] Trial 17 finished with value: 0.028363730935174414 and parameters: {'factors': 469}. Best is trial 8 with value: 0.030897424947406372.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.29 sec. Users per second: 933
EvaluatorHoldout: Processed 10460 (100.0%) in 11.82 sec. Users per second: 885
EvaluatorHoldout: Processed 10472 (100.0%) in 10.62 sec. Users per second: 986


[I 2023-12-11 22:18:42,126] Trial 18 finished with value: 0.03092019486806366 and parameters: {'factors': 112}. Best is trial 18 with value: 0.03092019486806366.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.78 sec. Users per second: 976
EvaluatorHoldout: Processed 10460 (100.0%) in 10.24 sec. Users per second: 1022
EvaluatorHoldout: Processed 10472 (100.0%) in 10.12 sec. Users per second: 1035


[I 2023-12-11 22:19:24,333] Trial 19 finished with value: 0.03097119496956199 and parameters: {'factors': 107}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.14 sec. Users per second: 945
EvaluatorHoldout: Processed 10460 (100.0%) in 11.39 sec. Users per second: 919
EvaluatorHoldout: Processed 10472 (100.0%) in 11.35 sec. Users per second: 922


[I 2023-12-11 22:20:30,455] Trial 20 finished with value: 0.02888404011183035 and parameters: {'factors': 293}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.11 sec. Users per second: 1042
EvaluatorHoldout: Processed 10460 (100.0%) in 10.95 sec. Users per second: 955
EvaluatorHoldout: Processed 10472 (100.0%) in 10.83 sec. Users per second: 967


[I 2023-12-11 22:21:10,942] Trial 21 finished with value: 0.030692888001137373 and parameters: {'factors': 95}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.43 sec. Users per second: 1009
EvaluatorHoldout: Processed 10460 (100.0%) in 10.38 sec. Users per second: 1008
EvaluatorHoldout: Processed 10472 (100.0%) in 10.28 sec. Users per second: 1019


[I 2023-12-11 22:21:48,029] Trial 22 finished with value: 0.030619109167577097 and parameters: {'factors': 81}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.89 sec. Users per second: 967
EvaluatorHoldout: Processed 10460 (100.0%) in 11.04 sec. Users per second: 947
EvaluatorHoldout: Processed 10472 (100.0%) in 13.30 sec. Users per second: 787


[I 2023-12-11 22:23:03,899] Trial 23 finished with value: 0.02947742299930763 and parameters: {'factors': 249}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 12.48 sec. Users per second: 844
EvaluatorHoldout: Processed 10460 (100.0%) in 12.16 sec. Users per second: 860
EvaluatorHoldout: Processed 10472 (100.0%) in 12.57 sec. Users per second: 833


[I 2023-12-11 22:24:03,701] Trial 24 finished with value: 0.03083589721005395 and parameters: {'factors': 137}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 13.72 sec. Users per second: 767
EvaluatorHoldout: Processed 10460 (100.0%) in 16.41 sec. Users per second: 637
EvaluatorHoldout: Processed 10472 (100.0%) in 15.38 sec. Users per second: 681


[I 2023-12-11 22:26:01,874] Trial 25 finished with value: 0.028387569549059716 and parameters: {'factors': 439}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 12.86 sec. Users per second: 819
EvaluatorHoldout: Processed 10460 (100.0%) in 12.82 sec. Users per second: 816
EvaluatorHoldout: Processed 10472 (100.0%) in 13.07 sec. Users per second: 801


[I 2023-12-11 22:28:00,744] Trial 26 finished with value: 0.027906250922092646 and parameters: {'factors': 576}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.84 sec. Users per second: 889
EvaluatorHoldout: Processed 10460 (100.0%) in 11.66 sec. Users per second: 897
EvaluatorHoldout: Processed 10472 (100.0%) in 11.38 sec. Users per second: 920


[I 2023-12-11 22:29:13,837] Trial 27 finished with value: 0.028772308367093467 and parameters: {'factors': 312}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 14.93 sec. Users per second: 705
EvaluatorHoldout: Processed 10460 (100.0%) in 14.43 sec. Users per second: 725
EvaluatorHoldout: Processed 10472 (100.0%) in 14.59 sec. Users per second: 718


[I 2023-12-11 22:31:25,078] Trial 28 finished with value: 0.02701763087269662 and parameters: {'factors': 788}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 11.13 sec. Users per second: 946
EvaluatorHoldout: Processed 10460 (100.0%) in 11.28 sec. Users per second: 928
EvaluatorHoldout: Processed 10472 (100.0%) in 11.04 sec. Users per second: 949


[I 2023-12-11 22:32:28,931] Trial 29 finished with value: 0.029730205310431912 and parameters: {'factors': 225}. Best is trial 19 with value: 0.03097119496956199.


EvaluatorHoldout: Processed 10530 (100.0%) in 10.51 sec. Users per second: 1002


[W 2023-12-11 22:32:49,487] Trial 30 failed with parameters: {'factors': 94} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\feder\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\feder\AppData\Local\Temp\ipykernel_14484\358869383.py", line 8, in train_evaluate
    result_df, _ = evaluator_tests[i].evaluateRecommender(SLIM_recommender)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\feder\Documents\DEV\my_RecSyS_Course_AT_PoliMi\Evaluation\Evaluator.py", line 276, in evaluateRecommender
    results_dict = self._run_evaluation_on_selected_users(recommender_object, self.users_to_evaluate)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 