In [1]:
from Data_manager.UserUtils import *
from Data_manager.split_functions.split_train_validation_random_holdout import \
    split_train_in_two_percentage_global_sample

URM_all = getURM_all()
URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.80)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage=0.80)



In [6]:
import numpy as np
import scipy.sparse as sps

def k_fold_cross_validation(URM_all, n_folds=5):
    num_users, num_items = URM_all.shape

    # Shuffle the user indices
    user_indices = np.arange(num_users)
    np.random.shuffle(user_indices)

    # Determine the size of each fold
    fold_sizes = np.full(n_folds, num_users // n_folds, dtype=int)
    fold_sizes[:num_users % n_folds] += 1

    # Iterate over each fold
    current_index = 0
    for fold_size in fold_sizes:
        start, stop = current_index, current_index + fold_size

        # Indices for the current fold
        validation_indices = user_indices[start:stop]

        # Create a mask for splitting the dataset
        train_mask = np.isin(np.arange(num_users), validation_indices, invert=True)

        # Split the dataset into training and validation
        URM_train = URM_all[train_mask]
        URM_validation = URM_all[validation_indices]

        yield URM_train, URM_validation
        current_index += fold_size


In [3]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender, \
    MultiThreadSLIM_SLIMElasticNetRecommender

model = MultiThreadSLIM_SLIMElasticNetRecommender

from Evaluation.Evaluator import EvaluatorHoldout

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users=[])

EvaluatorHoldout: Ignoring 2577 (20.4%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 0 Users


In [7]:
import optuna as op
import numpy as np


def objective(trial):
    # Hyperparameters to optimize
    topK = trial.suggest_int("topK", 3, 1e4)
    l1 = trial.suggest_float("l1_ratio", 0.01e-05, 1, log=True)
    alpha = trial.suggest_float("alpha", 5e-4, 1)

    MAP_scores = []

    for fold, (URM_train, URM_validation) in enumerate(k_fold_cross_validation(URM_all, n_folds=5)):
        # Model initialization and fitting
        recommender = model(URM_train)
        recommender.fit(topK=topK, l1_ratio=l1, alpha=alpha, workers=7)

        # Evaluation
        evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10], ignore_users=[])
        result, _ = evaluator_validation.evaluateRecommender(recommender)
        MAP_result = result["MAP_MIN_DEN"].item()
        MAP_scores.append(MAP_result)

        # Report intermediate result
        trial.report(np.mean(MAP_scores), fold)

        # Handle pruning based on the intermediate result
        if trial.should_prune():
            raise op.TrialPruned()

    # Return the average MAP across all folds
    return np.mean(MAP_scores)


In [8]:
best_params = {
    "topK": 8894,
    "l1_ratio": 0.05565733019999427,
    "alpha": 0.0012979360257937668
}
study_name = "slim-elastic-crossvalidation-study"  # Unique identifier of the study.
storage_name = "sqlite:///db.db"
pruner = op.pruners.MedianPruner()
study = op.create_study(study_name=study_name, storage=storage_name, direction="maximize", load_if_exists=True,
                        pruner=pruner)
study.enqueue_trial(best_params)
study.optimize(objective, n_trials=1)

[I 2023-12-02 02:17:40,852] Using an existing study with name 'slim-elastic-crossvalidation-study' instead of creating a new one.


SLIMElasticNetRecommender: URM Detected 104 ( 0.5%) items with no interactions.


100%|█████████▉| 22200/22222 [01:52<00:00, 115.07it/s]

EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Processed 2528 (100.0%) in 1.85 sec. Users per second: 1368
SLIMElasticNetRecommender: URM Detected 104 ( 0.5%) items with no interactions.



  0%|          | 0/22222 [00:00<?, ?it/s][A
  0%|          | 8/22222 [00:01<54:20,  6.81it/s][A
  0%|          | 72/22222 [00:01<04:54, 75.21it/s][A
  1%|          | 136/22222 [00:01<02:39, 138.48it/s][A
  1%|          | 176/22222 [00:01<02:10, 168.91it/s][A
  1%|          | 232/22222 [00:02<03:32, 103.32it/s][A
  1%|▏         | 296/22222 [00:02<02:33, 142.67it/s][A
  1%|▏         | 328/22222 [00:02<02:15, 162.07it/s][A
  2%|▏         | 424/22222 [00:02<01:24, 257.80it/s][A
  2%|▏         | 472/22222 [00:03<02:34, 140.81it/s][A
  2%|▏         | 504/22222 [00:03<02:45, 131.61it/s][A
  2%|▏         | 552/22222 [00:04<02:13, 162.37it/s][A
  3%|▎         | 584/22222 [00:04<02:08, 167.77it/s][A
  3%|▎         | 648/22222 [00:04<01:48, 198.10it/s][A
  3%|▎         | 680/22222 [00:05<03:46, 94.91it/s] [A
  3%|▎         | 744/22222 [00:05<03:00, 119.28it/s][A
  4%|▍         | 840/22222 [00:05<01:54, 186.24it/s][A
  4%|▍         | 872/22222 [00:06<02:55, 121.42it/s][A
  4%|▍ 

EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Processed 2528 (100.0%) in 1.72 sec. Users per second: 1470
SLIMElasticNetRecommender: URM Detected 117 ( 0.5%) items with no interactions.




  0%|          | 0/22222 [00:00<?, ?it/s][A[A

  0%|          | 8/22222 [00:01<49:42,  7.45it/s][A[A

  0%|          | 40/22222 [00:01<08:30, 43.45it/s][A[A

  0%|          | 72/22222 [00:01<04:30, 81.77it/s][A[A

  1%|          | 136/22222 [00:01<02:13, 165.20it/s][A[A

  1%|          | 232/22222 [00:02<02:36, 140.13it/s][A[A

  1%|          | 264/22222 [00:02<02:40, 137.13it/s][A[A

  1%|▏         | 296/22222 [00:02<02:29, 146.30it/s][A[A

  2%|▏         | 392/22222 [00:02<01:27, 248.41it/s][A[A

  2%|▏         | 456/22222 [00:03<02:20, 154.62it/s][A[A

  2%|▏         | 520/22222 [00:03<01:47, 202.76it/s][A[A

  3%|▎         | 568/22222 [00:03<01:50, 195.64it/s][A[A

  3%|▎         | 648/22222 [00:03<01:19, 272.98it/s][A[A

  3%|▎         | 696/22222 [00:04<02:22, 150.61it/s][A[A

  3%|▎         | 744/22222 [00:04<01:59, 180.00it/s][A[A

  4%|▎         | 808/22222 [00:05<01:42, 208.56it/s][A[A

  4%|▍         | 872/22222 [00:05<01:25, 249.23it/s][A

EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Processed 2528 (100.0%) in 1.86 sec. Users per second: 1356
SLIMElasticNetRecommender: URM Detected 128 ( 0.6%) items with no interactions.


  6%|▌         | 1224/22222 [00:07<01:43, 202.94it/s]

100%|█████████▉| 22192/22222 [01:50<00:00, 126.94it/s][A[A

EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Processed 2527 (100.0%) in 1.68 sec. Users per second: 1504
SLIMElasticNetRecommender: URM Detected 130 ( 0.6%) items with no interactions.



  0%|          | 0/22222 [00:00<?, ?it/s][A
  0%|          | 8/22222 [00:00<40:54,  9.05it/s][A
  0%|          | 40/22222 [00:01<09:18, 39.69it/s][A
  0%|          | 104/22222 [00:01<03:34, 102.92it/s][A
  1%|          | 168/22222 [00:01<02:04, 177.09it/s][A
  1%|          | 232/22222 [00:02<02:21, 155.09it/s][A
  1%|          | 264/22222 [00:02<03:09, 115.79it/s][A
  1%|▏         | 296/22222 [00:02<02:48, 129.93it/s][A
  2%|▏         | 424/22222 [00:02<01:22, 264.42it/s][A
  2%|▏         | 472/22222 [00:03<01:33, 231.65it/s][A
  2%|▏         | 512/22222 [00:03<02:17, 158.23it/s][A
  2%|▏         | 544/22222 [00:03<02:04, 174.26it/s][A
  3%|▎         | 576/22222 [00:03<02:02, 176.37it/s][A
  3%|▎         | 608/22222 [00:04<01:50, 195.61it/s][A
  3%|▎         | 680/22222 [00:04<01:38, 218.32it/s][A
  3%|▎         | 712/22222 [00:04<02:06, 170.25it/s][A
  3%|▎         | 744/22222 [00:04<02:12, 162.52it/s][A
  3%|▎         | 776/22222 [00:04<01:59, 179.80it/s][A
  4%|▎ 

EvaluatorHoldout: Ignoring 0 Users
EvaluatorHoldout: Processed 2527 (100.0%) in 1.65 sec. Users per second: 1532


[I 2023-12-02 02:27:07,273] Trial 1 finished with value: 0.012095898853387872 and parameters: {'topK': 8894, 'l1_ratio': 0.05565733019999427, 'alpha': 0.0012979360257937668}. Best is trial 1 with value: 0.012095898853387872.


In [8]:
study.best_params

{'topK': 8433, 'l1_ratio': 6.8480816822573514e-06, 'alpha': 0.9879215157806619}

In [9]:
final = model(URM_train_validation)
final.fit(**study.best_params, workers=7)

SLIMElasticNetRecommender: URM Detected 211 ( 1.7%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 101 ( 0.5%) items with no interactions.



  0%|          | 0/22222 [00:00<?, ?it/s][A
  0%|          | 8/22222 [00:01<1:19:04,  4.68it/s][A
  0%|          | 40/22222 [00:02<14:42, 25.13it/s] [A
  0%|          | 72/22222 [00:02<07:32, 48.96it/s][A
  0%|          | 104/22222 [00:02<07:38, 48.22it/s][A
  1%|          | 128/22222 [00:02<05:56, 61.97it/s][A
  1%|          | 168/22222 [00:03<04:05, 89.98it/s][A
  1%|          | 200/22222 [00:03<03:22, 108.64it/s][A
  1%|          | 232/22222 [00:03<04:07, 88.99it/s] [A
  1%|          | 248/22222 [00:03<03:51, 95.09it/s][A
  1%|▏         | 296/22222 [00:04<03:35, 101.83it/s][A
  2%|▏         | 360/22222 [00:04<02:20, 155.53it/s][A
  2%|▏         | 392/22222 [00:04<02:40, 136.00it/s][A
  2%|▏         | 416/22222 [00:05<02:42, 134.50it/s][A
  2%|▏         | 488/22222 [00:05<03:21, 107.63it/s][A
  2%|▏         | 520/22222 [00:05<02:53, 124.93it/s][A
  2%|▏         | 552/22222 [00:06<02:40, 134.79it/s][A
  3%|▎         | 584/22222 [00:06<02:35, 139.00it/s][A
  3%|▎    

In [None]:
from Evaluation.Evaluator import EvaluatorHoldout

evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10], ignore_users=[])
evaluator_test.evaluateRecommender(final)

In [12]:
pippo = model(URM_train_validation)
pippo.fit(topK=8894, l1_ratio=0.05565733019999427, alpha=0.0012979360257937668, workers=7)
evaluator_test.evaluateRecommender(pippo)

SLIMElasticNetRecommender: URM Detected 211 ( 1.7%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 101 ( 0.5%) items with no interactions.




  0%|          | 0/22222 [00:00<?, ?it/s][A[A

  0%|          | 8/22222 [00:01<55:21,  6.69it/s][A[A

  0%|          | 40/22222 [00:01<09:22, 39.43it/s][A[A

  0%|          | 72/22222 [00:01<04:57, 74.41it/s][A[A

  0%|          | 104/22222 [00:01<03:20, 110.24it/s][A[A

  1%|          | 200/22222 [00:01<01:32, 238.92it/s][A[A

  1%|          | 240/22222 [00:02<02:37, 139.88it/s][A[A

  1%|          | 272/22222 [00:02<02:33, 142.92it/s][A[A

  1%|▏         | 296/22222 [00:02<02:47, 131.06it/s][A[A

  2%|▏         | 360/22222 [00:02<01:50, 197.11it/s][A[A

  2%|▏         | 456/22222 [00:03<02:04, 174.51it/s][A[A

100%|█████████▉| 22216/22222 [08:04<00:00, 45.88it/s] A[A
100%|█████████▉| 22216/22222 [03:02<00:00, 121.50it/s]


  3%|▎         | 584/22222 [00:04<01:51, 193.47it/s][A[A

  3%|▎         | 680/22222 [00:04<02:04, 173.59it/s][A[A

  3%|▎         | 744/22222 [00:04<01:49, 196.57it/s][A[A

  4%|▍         | 840/22222 [00:05<01:36, 222.13it/s][A[A



EvaluatorHoldout: Processed 10468 (100.0%) in 6.45 sec. Users per second: 1624


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
 cutoff                                                                      
 10      0.096322                 0.165919  0.140001  0.050049     0.08563   
 
              MRR      NDCG        F1  HIT_RATE ARHR_ALL_HITS  ...  \
 cutoff                                                        ...   
 10      0.280652  0.153761  0.114125  0.525888      0.370758  ...   
 
        COVERAGE_USER COVERAGE_USER_HIT USERS_IN_GT DIVERSITY_GINI  \
 cutoff                                                              
 10          0.828296          0.435591    0.828296       0.055078   
 
        SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL RATIO_DIVERSITY_GINI  \
 cutoff                                                                   
 10           10.191239                   0.997688             0.158082   
 
        RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY RATIO_NOVELTY  
 cutoff                                   

In [None]:
##  MAP: 0.0225956, MAP_MIN_DEN: 0.0446655

Original scores

In [None]:
## MAP_MIN_DEN = .0822900 ## MAP = 0.0483066 the best 0.139

In [None]:
## MAP_MIN_DEN = .0845288 ## MAP = 0.0495724 second 0.135

In [None]:
## MAP_MIN_DEN = .0846506 ## MAP = 0.0496060 third 