In [1]:
import os

os.chdir('../')

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data_train_path = "Dataset/data_train.csv"

URM_all_dataframe = pd.read_csv(data_train_path)
print(URM_all_dataframe.head(10))

   user_id  item_id  data
0        0        0   1.0
1        0        2   1.0
2        0      120   1.0
3        0      128   1.0
4        0      211   1.0
5        0      232   1.0
6        0      282   1.0
7        0      453   1.0
8        0      458   1.0
9        0      491   1.0


In [3]:
import numpy as np
import scipy.sparse as sps
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout



URM_all = sps.coo_matrix((URM_all_dataframe['data'], (URM_all_dataframe['user_id'], URM_all_dataframe['item_id'])))
URM_all = URM_all.tocsr()

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 141 ( 0.4%) Users that have less than 1 test interactions


# Optuna

In [4]:
alpha_slim_EASER = 0.622977719931239

from Recommenders.SLIM.SLIMElasticNetRecommender_Peppe import MultiThreadSLIM_SLIMElasticNetRecommender
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender

recommender_slimNet = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train_validation)
recommender_slimNet.fit(topK=142, alpha = 9.521340863590419e-05, l1_ratio = 0.4033590645217344)

recommender_easeR = EASE_R_Recommender(URM_train_validation)
recommender_easeR.fit(l2_norm=33.20)

Progress: 100%|██████████| 38121/38121 [09:02<00:00, 70.24items/s]


EASE_R_Recommender: Fitting model... 
EASE_R_Recommender: Fitting model... done in 14.56 min


In [5]:
slim_similarity = recommender_slimNet.W_sparse
easeR_similarity = recommender_easeR.W_sparse
easeR_similarity = sps.csr_matrix(easeR_similarity)

# Saving to disk
sps.save_npz("slim_best_W_TRAINVAL.npz", slim_similarity)
sps.save_npz("easeR_best_W_TRAINVAL.npz", easeR_similarity)

In [6]:
similarity_SLIM_EASER = alpha_slim_EASER * slim_similarity + (1 - alpha_slim_EASER) * easeR_similarity

In [7]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
'''{'result': 0.05517828622068708, 'topK': 14.0, 'alpha': 0.3683550822991944, 'beta': 0.19877125816137325}
'''

rp3_recommender = RP3betaRecommender(URM_train_validation)
rp3_recommender.fit(topK=14, alpha=0.3683550822991944, beta=0.19877125816137325)

RP3betaRecommender: Similarity column 38121 (100.0%), 3701.77 column/sec. Elapsed time 10.30 sec


In [8]:
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender

def objective_function(trial):

    alpha = trial.suggest_uniform('alpha', 0.0, 1.0)


    recommender = ItemKNNCustomSimilarityRecommender(URM_train_validation)

    new_similarity = similarity_SLIM_EASER.multiply(alpha) + rp3_recommender.W_sparse.multiply(1-alpha)

    recommender.fit(new_similarity)

    result_dict, _ = evaluator_test.evaluateRecommender(recommender)

    MAP = result_dict.loc[10]["MAP"]
    
    return MAP

In [9]:
class SaveResults(object):
    
    def __init__(self):
        self.results_df = pd.DataFrame(columns = ["result"])
    
    def __call__(self, optuna_study, optuna_trial):
        hyperparam_dict = optuna_trial.params.copy()
        hyperparam_dict["result"] = optuna_trial.values[0]
        
        self.results_df = pd.concat([self.results_df, pd.DataFrame([hyperparam_dict])], ignore_index=True)
        self.results_df.to_csv("logs/Hybrid/hybrid_slim_EASER_RP3.csv", index = False)

In [None]:
import optuna

optuna_study = optuna.create_study(study_name="hybrid_slim_rp3_EASER", direction="maximize")
        
save_results = SaveResults()
        
optuna_study.optimize(objective_function,
                      callbacks=[save_results],
                      n_trials = 100
                      )

# Stopped the kernel since wasn't improving

In [11]:
best_index = save_results.results_df["result"].idxmax()
best_hyperparams = save_results.results_df.loc[best_index].to_dict()
print(best_hyperparams)

{'result': 0.06119306037721351, 'alpha': 0.8407690672301905}


In [4]:
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.EASE_R.EASE_R_Recommender import EASE_R_Recommender
from Recommenders.SLIM.SLIMElasticNetRecommender_Peppe import MultiThreadSLIM_SLIMElasticNetRecommender

slim_W = sps.load_npz("slim_best_W.npz")
easeR_W = sps.load_npz("easeR_best_W.npz")
alpha_slim_EASER = 0.622977719931239

similarity_SLIM_EASER = slim_W.multiply(alpha_slim_EASER) + easeR_W.multiply(1-alpha_slim_EASER)

# RP3
rp3_recommender = RP3betaRecommender(URM_all)
rp3_recommender.fit(topK=14, alpha=0.3683550822991944, beta=0.19877125816137325)

# Hybrid
final_recommender = ItemKNNCustomSimilarityRecommender(URM_all)
alpha = 0.8407690672301905
new_similarity = similarity_SLIM_EASER.multiply(alpha) + rp3_recommender.W_sparse.multiply(1-alpha)
final_recommender.fit(new_similarity)

RP3betaRecommender: Similarity column 38121 (100.0%), 3404.04 column/sec. Elapsed time 11.20 sec


In [5]:
target_user = pd.read_csv("Dataset/data_target_users_test.csv")
print(target_user.head(10))

user_id = target_user['user_id']
recommendations = []
for user in user_id:
    recommendations.append(final_recommender.recommend(user, cutoff=10))

# Saving in a submission file for the competition
submission = pd.DataFrame(columns=['user_id', 'item_list'])
submission['user_id'] = user_id
# Convert list of recommendations into a space-separated string
submission['item_list'] = [' '.join(map(str, recs)) for recs in recommendations]

# Save the submission file
submission.to_csv(f"submissions/hybrid_slim_rp3_EASER_alpha:{alpha}.csv", index=False)

   user_id
0        0
1        1
2        2
3        3
4        4
5        5
6        6
7        7
8        8
9        9
