<h1>Loading libraries</h1>

In [1]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
token = user_secrets.get_secret("token")

In [2]:
!git clone https://FrancescoZanella:{token}@github.com/FrancescoZanella/RecSys.git
%cd RecSys
!python run_compile_all_cython.py

Cloning into 'RecSys'...
remote: Enumerating objects: 1495, done.[K
remote: Counting objects: 100% (632/632), done.[K
remote: Compressing objects: 100% (270/270), done.[K
remote: Total 1495 (delta 364), reused 605 (delta 346), pack-reused 863[K
Receiving objects: 100% (1495/1495), 23.11 MiB | 9.28 MiB/s, done.
Resolving deltas: 100% (763/763), done.
/kaggle/working/RecSys
run_compile_all_cython: Found 10 Cython files in 4 folders...
run_compile_all_cython: All files will be compiled using your current python environment: '/opt/conda/bin/python'
Compiling [1/10]: MatrixFactorizationImpressions_Cython_Epoch.pyx... 
In file included from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarraytypes.h:1940[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/ndarrayobject.h:12[m[K,
                 from [01m[K/opt/conda/lib/python3.10/site-packages/numpy/core/include/numpy/arrayobject.h:5[m[K,
 

In [3]:
import scipy.sparse as sps
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as pyplot
import csv
from datetime import datetime
import time
from tqdm import tqdm

In [4]:
import sys
sys.path.append("/kaggle/working/RecSys")

In [5]:
 # from Recommenders.Recommender_import_list import *

In [6]:
from Utils.seconds_to_biggest_unit import *
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender

<h1> Loading the dataset and the target users </h1>

In [7]:
# open the dataset
path = "/kaggle/working/RecSys/recsys1/data_train.csv"
path_target = "/kaggle/working/RecSys/recsys1/data_target_users_test.csv"

file = open(path, 'r')



file1 = open(path_target, 'r')

# load the dataset removing the header
df = pd.read_csv(filepath_or_buffer=path,
                 header=0,
                 dtype={0:int, 1:int, 2:float},     
                 sep=",",
                 engine='python')

df_users = pd.read_csv(
    filepath_or_buffer = path_target,
    header=0,
    dtype={0: int},
    sep=",",
    engine="python"   , 
)


df_users.columns = ["UserID"]



# rename the columns
df.columns = ["UserID", "ItemID", "Interaction"]

<h1> Preprocessing </h1>

In [8]:
userID_unique = df["UserID"].unique()
itemID_unique = df["ItemID"].unique()
n_interactions = len(df)
print ("Number of items\t {}, Number of users\t {}".format(len(itemID_unique),len(userID_unique)))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))

Number of items	 22222, Number of users	 12638
Max ID items	 22347, Max Id users	 13024



<h1> Creating the URM </h1>

In [9]:
mapped_id, original_id = pd.factorize(itemID_unique)
item_original_ID_to_index = pd.Series(mapped_id, index=original_id)

In [10]:
mapped_id, original_id = pd.factorize(userID_unique)
user_original_ID_to_index = pd.Series(mapped_id, index=original_id)

In [11]:
df["UserID"] = df["UserID"].map(user_original_ID_to_index)
df["ItemID"] = df["ItemID"].map(item_original_ID_to_index)

In [12]:
URM_all = sps.coo_matrix((df["Interaction"].values, 
                          (df["UserID"].values, df["ItemID"].values)))

URM_all=URM_all.tocsr()

<h1> Create the split train,validation </h1>

In [13]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)



<h1> Evaluation metrics </h1>

In [14]:
def AP(recommended_items, relevant_items):
   
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    # Cumulative sum: precision at 1, at 2, at 3 ...
    p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))
    
    ap_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return ap_score

In [15]:
def recall(recommended_items, relevant_items):

    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

    recall_score = np.sum(is_relevant, dtype=np.float32) / relevant_items.shape[0]

    return recall_score

In [16]:
def evaluate_algorithm(URM_test, recommender_object, at=10):

    cumulative_recall = 0.0

    num_eval = 0


    for user_id in range(URM_test.shape[0]):

        relevant_items = URM_test.indices[URM_test.indptr[user_id]:URM_test.indptr[user_id+1]]

        if len(relevant_items)>0:

            recommended_items = recommender_object.recommend(user_id,cutoff=at)
            num_eval+=1


            cumulative_recall += recall(recommended_items, relevant_items)



    cumulative_recall /= num_eval


    return cumulative_recall 

<h1> Smarter hyperparameter search </h1>

In [17]:
cutoff=120

In [18]:
import optuna as op

In [19]:
model= SLIMElasticNetRecommender

In [20]:
def objective(trial):
    topK = trial.suggest_int("topK", 1, 12000)
    l1 = trial.suggest_float("l1_ratio", 1e-5, 1)
    alpha = trial.suggest_float("alpha", 1e-4, 1)

    
    recommender = model(URM_train)
    recommender.fit(**trial.params)
    
       
    return evaluate_algorithm(URM_validation,recommender, at=cutoff)

In [21]:
class SaveResults(object):
    
    def __init__(self):
        self.results_df = pd.DataFrame()
    
    def __call__(self, optuna_study, optuna_trial):
        hyperparam_dict = optuna_trial.params.copy()
        hyperparam_dict["result"] = optuna_trial.values[0]
        
        #self.results_df = pd.concat([self.results_df, pd.DataFrame(hyperparam_dict).transpose()], ignore_index=True)
        self.results_df = self.results_df._append(hyperparam_dict, ignore_index=True)

In [22]:
study = op.create_study(direction="maximize")
save_results= SaveResults()
study.optimize(objective, callbacks=[save_results], n_trials=60)
#study.enqueue_trial(best_params)

[I 2024-01-04 18:04:10,213] A new study created in memory with name: no-name-7cbc7954-fbd1-464c-9dbe-0dcef0c6fa67


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.03 min. Items per second: 122.25


[I 2024-01-04 18:07:19,082] Trial 0 finished with value: 0.0020562867841200047 and parameters: {'topK': 6276, 'l1_ratio': 0.19565168142819334, 'alpha': 0.5356610662967912}. Best is trial 0 with value: 0.0020562867841200047.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.00 min. Items per second: 123.57


[I 2024-01-04 18:10:26,434] Trial 1 finished with value: 0.0020562867841200047 and parameters: {'topK': 1606, 'l1_ratio': 0.8154650150733714, 'alpha': 0.13768195605826597}. Best is trial 0 with value: 0.0020562867841200047.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.06 min. Items per second: 120.99


[I 2024-01-04 18:13:37,419] Trial 2 finished with value: 0.0020562867841200047 and parameters: {'topK': 6879, 'l1_ratio': 0.7019089822278197, 'alpha': 0.9197494782990425}. Best is trial 0 with value: 0.0020562867841200047.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.05 min. Items per second: 121.25


[I 2024-01-04 18:16:47,856] Trial 3 finished with value: 0.0020562867841200047 and parameters: {'topK': 5614, 'l1_ratio': 0.27595374315576326, 'alpha': 0.8605513020988884}. Best is trial 0 with value: 0.0020562867841200047.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.01 min. Items per second: 122.97


[I 2024-01-04 18:19:55,733] Trial 4 finished with value: 0.05917881503790961 and parameters: {'topK': 1439, 'l1_ratio': 0.06423723091655871, 'alpha': 0.21094745677879245}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.02 min. Items per second: 122.53


[I 2024-01-04 18:23:04,136] Trial 5 finished with value: 0.0020562867841200047 and parameters: {'topK': 9090, 'l1_ratio': 0.5793443881865546, 'alpha': 0.08821936102552076}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.97 min. Items per second: 124.78


[I 2024-01-04 18:26:09,482] Trial 6 finished with value: 0.0020562867841200047 and parameters: {'topK': 2424, 'l1_ratio': 0.4122197984407487, 'alpha': 0.8312702066237669}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.00 min. Items per second: 123.51


[I 2024-01-04 18:29:16,512] Trial 7 finished with value: 0.03252593828900755 and parameters: {'topK': 5210, 'l1_ratio': 0.13480014995601072, 'alpha': 0.12598833561616202}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.12 min. Items per second: 118.76


[I 2024-01-04 18:32:30,818] Trial 8 finished with value: 0.0020562867841200047 and parameters: {'topK': 9576, 'l1_ratio': 0.7244832728574899, 'alpha': 0.46560147356944326}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.04 min. Items per second: 121.95


[I 2024-01-04 18:35:40,251] Trial 9 finished with value: 0.0020562867841200047 and parameters: {'topK': 3692, 'l1_ratio': 0.48218864983942517, 'alpha': 0.6295892713476274}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.05 min. Items per second: 121.31


[I 2024-01-04 18:38:50,677] Trial 10 finished with value: 0.0020562867841200047 and parameters: {'topK': 347, 'l1_ratio': 0.9642401091860777, 'alpha': 0.3038628675185887}. Best is trial 4 with value: 0.05917881503790961.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.15 min. Items per second: 117.68


[I 2024-01-04 18:42:07,139] Trial 11 finished with value: 0.23633261409229622 and parameters: {'topK': 4176, 'l1_ratio': 0.008582391346341478, 'alpha': 0.2731306740117975}. Best is trial 11 with value: 0.23633261409229622.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.12 min. Items per second: 118.79


[I 2024-01-04 18:45:21,807] Trial 12 finished with value: 0.23582831795443573 and parameters: {'topK': 3577, 'l1_ratio': 0.008564909200103, 'alpha': 0.27702464719384245}. Best is trial 11 with value: 0.23633261409229622.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.25 min. Items per second: 113.97


[I 2024-01-04 18:48:44,511] Trial 13 finished with value: 0.2551910760595064 and parameters: {'topK': 3898, 'l1_ratio': 0.004781446147704468, 'alpha': 0.3419675816024622}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.93 min. Items per second: 126.59


[I 2024-01-04 18:51:47,184] Trial 14 finished with value: 0.0020562867841200047 and parameters: {'topK': 11801, 'l1_ratio': 0.3035759573134549, 'alpha': 0.39507482473717986}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.05 min. Items per second: 121.34


[I 2024-01-04 18:54:57,671] Trial 15 finished with value: 0.2050575719878147 and parameters: {'topK': 4210, 'l1_ratio': 0.005691220721207195, 'alpha': 0.6465665116193009}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.95 min. Items per second: 125.75


[I 2024-01-04 18:58:01,485] Trial 16 finished with value: 0.0020562867841200047 and parameters: {'topK': 8092, 'l1_ratio': 0.195786584902429, 'alpha': 0.3629829192168215}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 123.98


[I 2024-01-04 19:01:08,227] Trial 17 finished with value: 0.06644028386118438 and parameters: {'topK': 2684, 'l1_ratio': 0.356491215024908, 'alpha': 0.03390663410487904}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.13 min. Items per second: 118.51


[I 2024-01-04 19:04:22,915] Trial 18 finished with value: 0.0020562867841200047 and parameters: {'topK': 4527, 'l1_ratio': 0.13866453374737212, 'alpha': 0.5100345214926013}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.10 min. Items per second: 119.30


[I 2024-01-04 19:07:36,383] Trial 19 finished with value: 0.012301378885774499 and parameters: {'topK': 7235, 'l1_ratio': 0.10538971356996425, 'alpha': 0.23650794667573985}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.05 min. Items per second: 121.59


[I 2024-01-04 19:10:46,466] Trial 20 finished with value: 0.0020562867841200047 and parameters: {'topK': 411, 'l1_ratio': 0.22391317911365005, 'alpha': 0.6087172186538885}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.04 min. Items per second: 121.99


[I 2024-01-04 19:13:55,939] Trial 21 finished with value: 0.07342130786469828 and parameters: {'topK': 3320, 'l1_ratio': 0.043728546639850756, 'alpha': 0.26434532073922895}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.06 min. Items per second: 120.99


[I 2024-01-04 19:17:06,943] Trial 22 finished with value: 0.11987660140105917 and parameters: {'topK': 4769, 'l1_ratio': 0.020017265231555278, 'alpha': 0.38397654022965266}. Best is trial 13 with value: 0.2551910760595064.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 16770 (75.5%) in 5.00 min. Items per second: 55.89
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 6.04 min. Items per second: 61.29


[I 2024-01-04 19:23:20,644] Trial 23 finished with value: 0.3222365434565734 and parameters: {'topK': 3226, 'l1_ratio': 0.0013657328880480601, 'alpha': 0.17877620797662885}. Best is trial 23 with value: 0.3222365434565734.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 12192 (54.9%) in 5.00 min. Items per second: 40.64
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 7.85 min. Items per second: 47.20


[I 2024-01-04 19:31:21,591] Trial 24 finished with value: 0.3688799075293374 and parameters: {'topK': 2152, 'l1_ratio': 0.14551643960722144, 'alpha': 0.0015749065577089216}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.21 min. Items per second: 115.42


[I 2024-01-04 19:34:41,586] Trial 25 finished with value: 0.23740059651017661 and parameters: {'topK': 1813, 'l1_ratio': 0.12353608118866799, 'alpha': 0.018957491276621774}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 123.90


[I 2024-01-04 19:37:48,147] Trial 26 finished with value: 0.0020562867841200047 and parameters: {'topK': 2872, 'l1_ratio': 0.25564167012506545, 'alpha': 0.15396013706444583}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 123.81


[I 2024-01-04 19:40:55,013] Trial 27 finished with value: 0.01221143029997725 and parameters: {'topK': 1050, 'l1_ratio': 0.378769763026554, 'alpha': 0.06555433610951024}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 123.93


[I 2024-01-04 19:44:01,476] Trial 28 finished with value: 0.004330430868549124 and parameters: {'topK': 2128, 'l1_ratio': 0.1670254417330556, 'alpha': 0.1639992474043575}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 124.04


[I 2024-01-04 19:47:07,777] Trial 29 finished with value: 0.0020562867841200047 and parameters: {'topK': 6422, 'l1_ratio': 0.08037118930614226, 'alpha': 0.7289990238617147}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.95 min. Items per second: 125.35


[I 2024-01-04 19:50:12,152] Trial 30 finished with value: 0.0020562867841200047 and parameters: {'topK': 5823, 'l1_ratio': 0.19582434779630759, 'alpha': 0.42964280518794606}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.10 min. Items per second: 119.56


[I 2024-01-04 19:53:25,635] Trial 31 finished with value: 0.20413595411917387 and parameters: {'topK': 2025, 'l1_ratio': 0.13172902391438338, 'alpha': 0.02584622121865468}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.25 min. Items per second: 114.09


[I 2024-01-04 19:56:47,914] Trial 32 finished with value: 0.24129594424914133 and parameters: {'topK': 1088, 'l1_ratio': 0.08230635640037078, 'alpha': 0.027516918858890106}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 124.04


[I 2024-01-04 19:59:54,287] Trial 33 finished with value: 0.11749614584084699 and parameters: {'topK': 1256, 'l1_ratio': 0.07526584637735495, 'alpha': 0.0989065882001678}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.96 min. Items per second: 125.12


[I 2024-01-04 20:02:59,073] Trial 34 finished with value: 0.0020562867841200047 and parameters: {'topK': 130, 'l1_ratio': 0.283185211539419, 'alpha': 0.18878808805642588}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.14 min. Items per second: 118.02


[I 2024-01-04 20:06:14,787] Trial 35 finished with value: 0.2084933555489227 and parameters: {'topK': 3061, 'l1_ratio': 0.560923518753924, 'alpha': 0.005600303081241485}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.97 min. Items per second: 124.69


[I 2024-01-04 20:09:20,203] Trial 36 finished with value: 0.013296168753416547 and parameters: {'topK': 973, 'l1_ratio': 0.07479354966049256, 'alpha': 0.32342884210282347}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.98 min. Items per second: 124.48


[I 2024-01-04 20:12:25,923] Trial 37 finished with value: 0.016495462938173763 and parameters: {'topK': 2516, 'l1_ratio': 0.21364487041261881, 'alpha': 0.10232268105412963}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.96 min. Items per second: 125.21


[I 2024-01-04 20:15:30,551] Trial 38 finished with value: 0.0020562867841200047 and parameters: {'topK': 5065, 'l1_ratio': 0.33811326581932843, 'alpha': 0.19880913997053412}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.00 min. Items per second: 123.51


[I 2024-01-04 20:18:37,807] Trial 39 finished with value: 0.1816420961129951 and parameters: {'topK': 3705, 'l1_ratio': 0.06388574795344505, 'alpha': 0.07044357925123206}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.98 min. Items per second: 124.29


[I 2024-01-04 20:21:43,813] Trial 40 finished with value: 0.0020562867841200047 and parameters: {'topK': 1675, 'l1_ratio': 0.4172810409125013, 'alpha': 0.14755028700293815}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.00 min. Items per second: 123.59


[I 2024-01-04 20:24:50,967] Trial 41 finished with value: 0.13545908715350688 and parameters: {'topK': 1813, 'l1_ratio': 0.13803414379195778, 'alpha': 0.04705887939838782}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.96 min. Items per second: 124.96


[I 2024-01-04 20:27:56,052] Trial 42 finished with value: 0.08246653446620807 and parameters: {'topK': 660, 'l1_ratio': 0.10227462147985804, 'alpha': 0.09879550445790128}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.11 min. Items per second: 118.96


[I 2024-01-04 20:31:10,432] Trial 43 finished with value: 0.20874489982543126 and parameters: {'topK': 1489, 'l1_ratio': 0.15623746338886463, 'alpha': 0.020617641917186857}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 124.01


[I 2024-01-04 20:34:16,857] Trial 44 finished with value: 0.0020562867841200047 and parameters: {'topK': 2505, 'l1_ratio': 0.730340090930909, 'alpha': 0.21954812906048501}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.99 min. Items per second: 123.99


[I 2024-01-04 20:37:23,357] Trial 45 finished with value: 0.0020562867841200047 and parameters: {'topK': 23, 'l1_ratio': 0.9986027568063471, 'alpha': 0.1369147476026866}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.00 min. Items per second: 123.63


[I 2024-01-04 20:40:30,320] Trial 46 finished with value: 0.029401576340961372 and parameters: {'topK': 3813, 'l1_ratio': 0.052497387467990264, 'alpha': 0.33964124660200606}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.13 min. Items per second: 118.37


[I 2024-01-04 20:43:45,554] Trial 47 finished with value: 0.21119106445978017 and parameters: {'topK': 3129, 'l1_ratio': 0.2578448658375409, 'alpha': 0.012018735420884706}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.17 min. Items per second: 116.92


[I 2024-01-04 20:47:03,113] Trial 48 finished with value: 0.2308944452798391 and parameters: {'topK': 2059, 'l1_ratio': 0.03605927273519659, 'alpha': 0.07204661029168316}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.98 min. Items per second: 124.48


[I 2024-01-04 20:50:08,950] Trial 49 finished with value: 0.0020562867841200047 and parameters: {'topK': 905, 'l1_ratio': 0.10935891833615624, 'alpha': 0.9544867528309888}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.96 min. Items per second: 125.12


[I 2024-01-04 20:53:13,667] Trial 50 finished with value: 0.0020562867841200047 and parameters: {'topK': 4355, 'l1_ratio': 0.17638589237904023, 'alpha': 0.17899790337784022}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.02 min. Items per second: 122.52


[I 2024-01-04 20:56:22,548] Trial 51 finished with value: 0.1951244070077654 and parameters: {'topK': 5249, 'l1_ratio': 0.014998645340730248, 'alpha': 0.2759094365555624}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.66 min. Items per second: 101.27


[I 2024-01-04 21:00:10,716] Trial 52 finished with value: 0.27760832333410906 and parameters: {'topK': 4351, 'l1_ratio': 0.001903005185729667, 'alpha': 0.4432303210455722}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.13 min. Items per second: 118.46


[I 2024-01-04 21:03:25,898] Trial 53 finished with value: 0.0020562867841200047 and parameters: {'topK': 4114, 'l1_ratio': 0.8765532425228134, 'alpha': 0.46686586657074236}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.50 min. Items per second: 105.85


[I 2024-01-04 21:07:03,872] Trial 54 finished with value: 0.2612891313516362 and parameters: {'topK': 3246, 'l1_ratio': 0.0023983905322507992, 'alpha': 0.5248156806921214}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.29 min. Items per second: 112.61


[I 2024-01-04 21:10:29,004] Trial 55 finished with value: 0.25371316244595493 and parameters: {'topK': 11782, 'l1_ratio': 0.0027583695310723225, 'alpha': 0.5532680632015529}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.08 min. Items per second: 120.42


[I 2024-01-04 21:13:40,961] Trial 56 finished with value: 0.1997481830584322 and parameters: {'topK': 7830, 'l1_ratio': 0.006960480633412009, 'alpha': 0.5671128432871126}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.01 min. Items per second: 123.10


[I 2024-01-04 21:16:48,644] Trial 57 finished with value: 0.013296168753416547 and parameters: {'topK': 10952, 'l1_ratio': 0.043989775768755277, 'alpha': 0.5465235035284519}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 3.01 min. Items per second: 122.85


[I 2024-01-04 21:19:56,819] Trial 58 finished with value: 0.12083410452381939 and parameters: {'topK': 8994, 'l1_ratio': 0.01145278224170778, 'alpha': 0.674724840447355}. Best is trial 24 with value: 0.3688799075293374.


SLIMElasticNetRecommender: URM Detected 144 ( 1.1%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 68 ( 0.3%) items with no interactions.
SLIMElasticNetRecommender: Processed 22222 (100.0%) in 2.96 min. Items per second: 125.29


[I 2024-01-04 21:23:01,407] Trial 59 finished with value: 0.023252950405018263 and parameters: {'topK': 5483, 'l1_ratio': 0.04526721243429033, 'alpha': 0.4323376826515556}. Best is trial 24 with value: 0.3688799075293374.


In [23]:
study.best_trial

FrozenTrial(number=24, state=TrialState.COMPLETE, values=[0.3688799075293374], datetime_start=datetime.datetime(2024, 1, 4, 19, 23, 20, 648197), datetime_complete=datetime.datetime(2024, 1, 4, 19, 31, 21, 591513), params={'topK': 2152, 'l1_ratio': 0.14551643960722144, 'alpha': 0.0015749065577089216}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'topK': IntDistribution(high=12000, log=False, low=1, step=1), 'l1_ratio': FloatDistribution(high=1.0, log=False, low=1e-05, step=None), 'alpha': FloatDistribution(high=1.0, log=False, low=0.0001, step=None)}, trial_id=24, value=None)

In [24]:
study.best_params

{'topK': 2152, 'l1_ratio': 0.14551643960722144, 'alpha': 0.0015749065577089216}

In [25]:
save_results.results_df

Unnamed: 0,topK,l1_ratio,alpha,result
0,6276.0,0.195652,0.535661,0.002056
1,1606.0,0.815465,0.137682,0.002056
2,6879.0,0.701909,0.919749,0.002056
3,5614.0,0.275954,0.860551,0.002056
4,1439.0,0.064237,0.210947,0.059179
5,9090.0,0.579344,0.088219,0.002056
6,2424.0,0.41222,0.83127,0.002056
7,5210.0,0.1348,0.125988,0.032526
8,9576.0,0.724483,0.465601,0.002056
9,3692.0,0.482189,0.629589,0.002056


In [26]:
output_path = '/kaggle/working/output.csv'
save_results.results_df.to_csv(output_path, sep=',', index=False)


<h1> Create a SLIM recommender with the best parameters found (with step 1) </h1>

In [27]:
#recommender_SLIM = SLIMElasticNetRecommender(URM_all)
#recommender_SLIM.fit(**study.best_params)

<h1> Create a TopPop for cold start problem </h1>

In [28]:
class TopPopRecommenderUnseen(object):

    def fit(self, URM_train):
        
        self.URM_train = URM_train

        item_popularity = np.ediff1d(URM_train.tocsc().indptr)

        # We are not interested in sorting the popularity value,
        # but to order the items according to it
        self.popular_items = np.argsort(item_popularity)
        self.popular_items = np.flip(self.popular_items, axis = 0)
    
    
    def recommend(self, user_id, at=10, remove_seen=True):

        if remove_seen:
            # una lista con gli di di tutti gli item che ha gia visto
            seen_items = self.URM_train.indices[self.URM_train.indptr[user_id]:self.URM_train.indptr[user_id+1]]
            
            # remove from the popular items all the items already seen
            unseen_items_mask = np.in1d(self.popular_items, seen_items, assume_unique=True, invert = True)
            
            #apply the mask
            unseen_items = self.popular_items[unseen_items_mask]

            recommended_items = unseen_items[0:at]

        else:
            recommended_items = self.popular_items[0:at]
            

        return recommended_items

In [29]:
topPopRecommenderUnseen = TopPopRecommenderUnseen()
topPopRecommenderUnseen.fit(URM_all)

<h1> Generate submission </h1>

In [30]:
def generate_submission(recommender):
        
        hour = str(int(datetime.now().strftime("%H"))+1)

        current_datetime = datetime.now().strftime("%M:%S")
    
        output_file = f'submission_{hour + ":" + current_datetime}.csv'
    
    


        with open(output_file, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)

            # Write the header
            writer.writerow(['user_id', 'item_list'])
        
        
        
        
    
            # Generate and write recommendations for each user
            for user_id in tqdm(df_users["UserID"]):
                if user_id in original_id:
                    if ((URM_all.indptr[user_original_ID_to_index[user_id]+1]-URM_all.indptr[user_original_ID_to_index[user_id]])>5): #user_id ha piu di 5 interazioni, usa effettivamente SLIM
                        writer.writerow([user_id, ' '.join(map(str,[item_original_ID_to_index[item_original_ID_to_index== val].index[0] for val in recommender.recommend(user_original_ID_to_index[user_id])[:10]] ))])
                    else: #user_id ha meno di 5 interazioni, usa TopPop
                        writer.writerow([user_id, ' '.join(map(str,[item_original_ID_to_index[item_original_ID_to_index == val].index[0] for val in topPopRecommenderUnseen.recommend(user_original_ID_to_index[user_id],remove_seen=True)[:10]] ))])
                else:
                    writer.writerow([user_id, ' '.join(map(str,[item_original_ID_to_index[item_original_ID_to_index == val].index[0] for val in topPopRecommenderUnseen.recommend(user_original_ID_to_index[3],remove_seen=False)[:10]] ))])
        
        return output_file

In [31]:
#output_file = generate_submission(recommender_SLIM)