<h2> Loading libraries </h2>

In [None]:
CYTHON = True

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
token = user_secrets.get_secret("token")

In [None]:
!git clone https://FrancescoZanella:{token}@github.com/FrancescoZanella/RecSys.git
%cd RecSys

In [None]:
pip install Cython==0.29.23

In [None]:
pip install nltk==3.6.1

In [None]:
pip install nose==1.3.7

In [None]:
pip install numpy>=1.19

In [None]:
if CYTHON:
   ! python run_compile_all_cython.py

In [None]:
import scipy.sparse as sps
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as pyplot
import csv
from datetime import datetime
import time
from tqdm import tqdm

In [None]:
import sys
sys.path.append("/kaggle/working/RecSys")

In [None]:
from Utils.seconds_to_biggest_unit import *
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import *
from Recommenders.GraphBased.P3alphaRecommender import *
from Recommenders.KNN.ItemKNNCFRecommender import *

from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import ItemKNNCustomSimilarityRecommender


from MyTuning.Recall.best_pars_recall import *

<h2> Loading the dataset and the target users </h2>

In [None]:
# open the dataset
path = "/kaggle/working/RecSys/recsys1/data_train.csv"
path_target = "/kaggle/working/RecSys/recsys1/data_target_users_test.csv"

file = open(path, 'r')



file1 = open(path_target, 'r')

# load the dataset removing the header
df = pd.read_csv(filepath_or_buffer=path,
                 header=0,
                 dtype={0:int, 1:int, 2:float},     
                 sep=",",
                 engine='python')

df_users = pd.read_csv(
    filepath_or_buffer = path_target,
    header=0,
    dtype={0: int},
    sep=",",
    engine="python"   , 
)


df_users.columns = ["UserID"]



# rename the columns
df.columns = ["UserID", "ItemID", "Interaction"]

<h2> Preprocesssing </h2>

In [None]:
userID_unique = df["UserID"].unique()
itemID_unique = df["ItemID"].unique()
n_interactions = len(df)
print ("Number of items\t {}, Number of users\t {}".format(len(itemID_unique),len(userID_unique)))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))

<h2> Mapping and URM creation</h2>

In [None]:
mapped_id, original_id = pd.factorize(itemID_unique)
item_original_ID_to_index = pd.Series(mapped_id, index=original_id)

In [None]:
mapped_id, original_id = pd.factorize(userID_unique)
user_original_ID_to_index = pd.Series(mapped_id, index=original_id)

In [None]:
df["UserID"] = df["UserID"].map(user_original_ID_to_index)
df["ItemID"] = df["ItemID"].map(item_original_ID_to_index)

In [None]:
URM_all = sps.coo_matrix((df["Interaction"].values, 
                          (df["UserID"].values, df["ItemID"].values)))

URM_all

<h2> K fold </h2>

In [None]:
from tqdm import tqdm
import numpy as np
import gc
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

def create_folds(URM_all, k):
    n_el=URM_all.nnz
    URM_train_list=[]
    URM_validation_list=[]
    URM_remaining=URM_all
    for i in range(k):
        URM_remaining, URM_validation = split_train_in_two_percentage_global_sample(URM_remaining, train_percentage = ((n_el-(i+1)*n_el/k)/URM_remaining.nnz) )
        URM_train_list.append(URM_all - URM_validation)
        URM_validation_list.append(URM_validation)

    return URM_train_list, URM_validation_list


def recall(recommended_items, relevant_items):

    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

    recall_score = np.sum(is_relevant, dtype=np.float32) / relevant_items.shape[0]

    return recall_score

def evaluate_algorithm(URM_test, recommender_object, at=10):

    cumulative_recall = 0.0

    num_eval = 0


    for user_id in range(URM_test.shape[0]):

        relevant_items = URM_test.indices[URM_test.indptr[user_id]:URM_test.indptr[user_id+1]]

        if len(relevant_items)>0:

            recommended_items = recommender_object.recommend(user_id,cutoff=at)
            num_eval+=1


            cumulative_recall += recall(recommended_items, relevant_items)



    cumulative_recall /= num_eval


    return cumulative_recall 

def kFold_evaluation_my_hybrid(URM_all, k=10,cutoff=10,garbage_collection=False):
    URM_train_list,URM_validation_list = create_folds(URM_all,k)
    res=0
    acc=0
    for i in range(k):
              
        recommender_SLIM = SLIMElasticNetRecommender(URM_train_list[i])
        recommender_SLIM.fit(**best_pars_recall_dict['SLIMElasticNetRecommender'])
            
        recommender_rp3 = RP3betaRecommender(URM_train_list[i])
        recommender_rp3.fit(**best_pars_recall_dict['RP3betaRecommender'])
        
        #ALLENA KNN E ALPHA E RUNNA 
        new_similarity = 0.3648920858420368 * recommender_SLIM.W_sparse + 0.30413965737024856 * recommender_rp3.W_sparse + 0.053940767085383985 * item_knn.W_sparse + 0.27702748970233065 * P3a.W_sparse
            
        recommender_object = ItemKNNCustomSimilarityRecommender(URM_train_list[i])
        recommender_object.fit(new_similarity)
            
            
        res=evaluate_algorithm(URM_validation_list[i],recommender_object, at=cutoff)
        acc=acc+res
        print("Fold" + str(i)+" evaluation ended with value " + str(res))
    print("Evaluation on all folded ended. Average recall is: "+ str(acc/k))



In [None]:
kFold_evaluation_my_hybrid(URM_all,k=20,cutoff=249)