In [None]:
!sed -i 's/np\.int/int/g' /opt/conda/lib/python3.10/site-packages/skopt/space/transformers.py

In [None]:
import os

# Imposta il percorso della cartella di input
input_folder_path = '/kaggle/input/finalrepo/ProgettoReccomender'

# Verifica se la cartella di input esiste
if os.path.exists(input_folder_path):
    # Spostati nella cartella di input
    os.chdir(input_folder_path)
    
    # Stampa il nuovo percorso di lavoro
    print("Nuovo percorso di lavoro:", os.getcwd())
else:
    print(f"La cartella di input '{input_folder_path}' non esiste.")

In [None]:
#**ALGORITMO RIASSUNTIVO **

from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
import pandas as pd
import scipy.sparse as sp


#Files path
train_data = "/kaggle/input/recommender-system-2023-challenge-polimi/data_train.csv"
users_file = "/kaggle/input/recommender-system-2023-challenge-polimi/data_target_users_test.csv"

#Files Opening
URM_file  = open(train_data, 'r')
users_to_recommend = open(users_file, 'r')

#Users to reccomend reading
users = pd.read_csv(users_to_recommend, names=['user_id'], header=0, dtype={0:int})

#URM Reading
column_names = ['user_id','item_id','data']
dataframe = pd.read_csv(URM_file, names=column_names, header=0, dtype={0:int, 1:int, 2:float})
print(dataframe.head(20))

URM_all = sp.coo_matrix((dataframe["data"].values, 
                          (dataframe["user_id"].values, dataframe["item_id"].values)))

In [None]:
from Evaluation.Evaluator import EvaluatorHoldout

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])


from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender


rp3_index  = 0
slim_index = 2

params_list = [
    {"alpha": 0.3365280890390189, "beta": 0.14559541984971103, "topK": 32}, #BEST CONFIG BASED ON 30 TESTs
    {"alpha": 0.33543320721534375, "beta": 0.13643832833854405, "topK": 31},  #Previous best CONFIG
    {"alpha": 0.3227577390094315, "beta": 0.12631978308173344, "topK": 32},
    {"alpha": 0.2617995480887691, "beta": 0.22418804987168886, "topK": 30},
    {'alpha': 0.28585975670634217, 'beta': 0.13388489746818844, 'topK': 33},
    {'alpha': 0.3361086178381283, 'beta': 0.13949133462799973, 'topK': 29}
]


recommender_object_RP3 = RP3betaRecommender(URM_train)
recommender_object_RP3.fit(alpha=params_list[rp3_index]["alpha"], 
                           beta=params_list[rp3_index]["beta"], 
                           topK=params_list[rp3_index]["topK"], 
                           implicit=True)

slim_params_list = [
    {'topK': 2305, 'l1_ratio': 0.15984659917724292, 'alpha': 0.0006895792558081994},  # BEST
    {'topK': 2339, 'l1_ratio': 0.15486907556362542, 'alpha': 0.0006851706335261893},  #previous BEST CONFIG
    {"topK": 2327, "l1_ratio": 0.15346747937279875, "alpha": 0.000677913689441996}, # New Best
    {'topK': 2427, 'l1_ratio': 0.14931044947790595, 'alpha': 0.0007442377587336158},
    {'topK': 2310, 'l1_ratio': 0.1519150334556062, 'alpha': 0.0006862030334431442}
    
]

recommender_object_SLIM = SLIMElasticNetRecommender(URM_train)
recommender_object_SLIM.fit(topK= slim_params_list[slim_index]["topK"],
                            l1_ratio= slim_params_list[slim_index]["l1_ratio"], 
                            alpha= slim_params_list[slim_index]["alpha"])


results_df, results_run_string = evaluator_test.evaluateRecommender(recommender_object_RP3)
print("RP3 : ", results_run_string)
results_df, results_run_string = evaluator_test.evaluateRecommender(recommender_object_SLIM)
print("SLIM : ", results_run_string)
print("Now in Validation : ")
results_df, results_run_string = evaluator_validation.evaluateRecommender(recommender_object_RP3)
print("RP3 : ", results_run_string)
results_df, results_run_string = evaluator_validation.evaluateRecommender(recommender_object_SLIM)
print("SLIM : ", results_run_string)


In [None]:
#Testo tutti i parametri per RP3 (occhio che cambio gli splitting di train e validation)
from Evaluation.Evaluator import EvaluatorHoldout

winning_counts = [0] * 6  # Inizializza una lista per contare quante volte ogni configurazione vince

for j in range(5):        
    print("ROUND : ", j)
    URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.8)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage=0.8)

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    round_winner_index = None
    max_map = 0.0

    for i in range(6):    
        recommender_object_RP3 = RP3betaRecommender(URM_train_validation)
        recommender_object_RP3.fit(alpha=params_list[i]["alpha"], 
                                   beta=params_list[i]["beta"], 
                                   topK=params_list[i]["topK"], 
                                   implicit=True)

        results_df, results_run_string = evaluator_test.evaluateRecommender(recommender_object_RP3)
        current_map = results_df["MAP"].item()

        print(i, ") RP3 MAP: ", current_map)

        
        if current_map > max_map:
            max_map = current_map
            round_winner_index = i

    # Incrementa il contatore per l'indice della configurazione che vince il round corrente
    winning_counts[round_winner_index] += 1

# Trova l'indice della configurazione con il maggior numero di vittorie
winner_index = winning_counts.index(max(winning_counts))

# Stampa il vincitore finale
print("\nWinner Configuration:")
print(f"Index: {winner_index}, Wins: {winning_counts[winner_index]} rounds")

rp3_index = 0


In [None]:
#Testo tutti i parametri per SLIM (occhio che cambio gli splitting di train e validation)
from Evaluation.Evaluator import EvaluatorHoldout

winning_counts = [0] * 5  # Inizializza una lista per contare quante volte ogni configurazione vince

for j in range(6):        
    print("ROUND : ", j)
    URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.8)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage=0.8)

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    round_winner_index = None
    max_map = 0.0

    for i in range(5):    
        recommender_object_SLIM = SLIMElasticNetRecommender(URM_train)
        recommender_object_SLIM.fit(topK= slim_params_list[i]["topK"],
                                l1_ratio= slim_params_list[i]["l1_ratio"], 
                                alpha= slim_params_list[i]["alpha"])
    
        results_df, results_run_string = evaluator_test.evaluateRecommender(recommender_object_SLIM)
        current_map = results_df["MAP"].item()
        print(i,") SLIM MAP: ", current_map)
        
        if current_map > max_map:
            max_map = current_map
            round_winner_index = i

    # Incrementa il contatore per l'indice della configurazione che vince il round corrente
    winning_counts[round_winner_index] += 1

# Trova l'indice della configurazione con il maggior numero di vittorie
winner_index = winning_counts.index(max(winning_counts))

# Stampa il vincitore finale
print("\nWinner Configuration:")
print(f"Index: {winner_index}, Wins: {winning_counts[winner_index]} rounds")

slim_index = winner_index

In [None]:
#Cross Validation
'''
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from sklearn.model_selection import KFold

# Le tue funzioni di splitting
def split_data(URM_all, train_percentage):
    URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=train_percentage)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage=train_percentage)
    return URM_train, URM_validation, URM_test

# Numero di fold per la cross-validation
num_folds = 30
kf = KFold(n_splits=num_folds)


best_map = 0.0
best_params = None

for i, params in enumerate(params_list):
    map_sum = 0.0

    for train_index, val_index in kf.split(URM_all):
        URM_train, URM_validation, URM_test = split_data(URM_all, train_percentage=0.8)

        recommender_object_RP3 = RP3betaRecommender(URM_train_validation)

        recommender_object_RP3.fit(alpha=params["alpha"], beta=params["beta"], topK=params["topK"], implicit=True)

        results_df, _ = evaluator_test.evaluateRecommender(recommender_object_RP3)
        map_value = results_df["MAP"].item()
        map_sum += map_value

    avg_map = map_sum / num_folds

    print(f"{i + 1}) RP3 with params {params}: Avg MAP = {avg_map}")

    if avg_map > best_map:
        best_map = avg_map
        best_params = params

print(f"Best parameters: {best_params}, Best Avg MAP: {best_map}")
'''

In [None]:
# Comprensione del range dei valori di alpha

alpha_values = [0, 0.12, 0.22, 0.32, 0.42, 0.52, 0.62, 0.72, 0.82, 0.92, 1]

best_alpha = None
best_map = 0.0

for x in alpha_values:
    new_similarity = (1 - x) * recommender_object_RP3.W_sparse + x * recommender_object_SLIM.W_sparse
    
    hybridrecommender_object = ItemKNNCustomSimilarityRecommender(URM_train)
    hybridrecommender_object.fit(new_similarity)
    
    result_df, _ = evaluator_validation.evaluateRecommender(hybridrecommender_object)
    map_value = result_df["MAP"].values[0]
    print("MAP : ", map_value)

    if map_value > best_map:
        best_map = map_value
        best_alpha = x

print(f"Miglior valore di alpha in validation: {best_alpha}")
print(f"Miglior valore di MAP: {best_map}")

#TEST BEST ALPHA on URM_TRAIN
import numpy as np
'''
def run_experiment(alpha):
    new_similarity = (1 - alpha) * recommender_object_RP3.W_sparse + alpha * recommender_object_SLIM.W_sparse

    hybridrecommender_object = ItemKNNCustomSimilarityRecommender(URM_train)
    hybridrecommender_object.fit(new_similarity)

    results_df_test, results_run_string_test = evaluator_test.evaluateRecommender(hybridrecommender_object)
    print("alpha : ", alpha, results_run_string_test)

    return results_df_test, alpha


# Loop over alpha values
alpha_values = [round(x, 4) for x in list(np.arange(0.50, 0.63, 0.001))]  # Generates a list of alpha values

# Initialize a list to store the top 5 alphas and their corresponding MAP scores
top_alphas = []

for alpha in alpha_values:
    results_test, current_alpha = run_experiment(alpha)

    # Update the list of top alphas
    top_alphas.append((current_alpha, results_test["MAP"].values[0]))

    # Sort the list based on MAP scores in descending order
    top_alphas = sorted(top_alphas, key=lambda x: x[1], reverse=True)[:5]

# Print the top 5 alphas and their corresponding MAP scores
print("Top 5 alphas and their MAP scores:")
for i, (alpha, map_score) in enumerate(top_alphas, 1):
    print(f"Rank {i}: Alpha = {alpha}, MAP = {map_score}")

# Extract the best alpha for the test set
best_alpha_test = top_alphas[0][0]

# Print the results
print(f"Best alpha for test set: {best_alpha_test}, Best MAP: {top_alphas[0][1]}")

alpha = best_alpha_test
'''



In [None]:
#ORA RITRAINO con URM_TRAIN_VALIDATION (più dati)

recommender_object_RP3 = RP3betaRecommender(URM_train_validation)
recommender_object_RP3.fit(alpha=params_list[rp3_index]["alpha"], 
                           beta=params_list[rp3_index]["beta"], 
                           topK=params_list[rp3_index]["topK"], 
                           implicit=True)

recommender_object_SLIM = SLIMElasticNetRecommender(URM_train_validation)
recommender_object_SLIM.fit(topK= slim_params_list[slim_index]["topK"],
                            l1_ratio= slim_params_list[slim_index]["l1_ratio"], 
                            alpha= slim_params_list[slim_index]["alpha"])

def run_experiment2(alpha):
    new_similarity = (1 - alpha) * recommender_object_RP3.W_sparse + alpha * recommender_object_SLIM.W_sparse

    hybridrecommender_object = ItemKNNCustomSimilarityRecommender(URM_train_validation)
    hybridrecommender_object.fit(new_similarity)

    results_df_test, results_run_string_test = evaluator_test.evaluateRecommender(hybridrecommender_object)
    print("alpha : ", alpha, "MAP : ", results_df_test["MAP"].values[0])

    return results_df_test, alpha


# Loop over alpha values
alpha_values = [round(x, 4) for x in list(np.arange(0.45, 0.65, 0.001))]  # Generates a list of alpha values
top_alphas = []

for alpha in alpha_values:
    results_test, current_alpha = run_experiment2(alpha)
    top_alphas.append((current_alpha, results_test["MAP"].values[0]))

    # Sort the list based on MAP scores in descending order
    top_alphas = sorted(top_alphas, key=lambda x: x[1], reverse=True)[:5]

# Print the top 5 alphas and their corresponding MAP scores
print("Top 5 alphas and their MAP scores:")
for i, (alpha, map_score) in enumerate(top_alphas, 1):
    print(f"Rank {i}: Alpha = {alpha}, MAP = {map_score}")

# Extract the best alpha for the test set
best_alpha_test = top_alphas[0][0]

# Print the results
print(f"Best alpha for test set: {best_alpha_test}, Best MAP: {top_alphas[0][1]}")


In [None]:
#ORA RITRAINO con URM_ALL per fare le predizioni


print ("Alpha: ", best_alpha_test)
print("RP3 params: ", params_list[rp3_index])
print("SLIM params : ", slim_params_list[slim_index])

recommender_object_RP3 = RP3betaRecommender(URM_all)
recommender_object_RP3.fit(alpha=params_list[rp3_index]["alpha"], 
                           beta=params_list[rp3_index]["beta"], 
                           topK=params_list[rp3_index]["topK"], 
                           implicit=True)

recommender_object_SLIM = SLIMElasticNetRecommender(URM_all)
recommender_object_SLIM.fit(topK= slim_params_list[slim_index]["topK"],
                            l1_ratio= slim_params_list[slim_index]["l1_ratio"], 
                            alpha= slim_params_list[slim_index]["alpha"])


new_similarity = (1 - best_alpha_test) * recommender_object_RP3.W_sparse + best_alpha_test * recommender_object_SLIM.W_sparse


hybridrecommender_object = ItemKNNCustomSimilarityRecommender(URM_all)
hybridrecommender_object.fit(new_similarity)


In [None]:
#TEST
hybridrecommender_object.recommend(1, cutoff=10)

In [None]:
import os
import csv

folder_path = "/kaggle/working/result_experiments/"

os.makedirs(folder_path, exist_ok=True)

csv_filename = os.path.join(folder_path, "sample_submission.csv")

if not os.path.isfile(csv_filename):
    open(csv_filename, 'w').close()

with open(csv_filename, 'w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)

    csv_writer.writerow(['user_id', 'item_list'])
    for user_id in users['user_id']:
        item_list_str = ' '.join(map(str, hybridrecommender_object.recommend(user_id, cutoff=10)))
        # Scrivi nel file l'ID utente originale
        csv_writer.writerow([user_id, item_list_str])        

print(f"CSV file '{csv_filename}' has been created.")