In [1]:
import os 
import sys
while os.path.split(os.getcwd())[1] != 'RecSysChallenge2023-Team':
    os.chdir('..')
sys.path.insert(1, os.getcwd())

import numpy as np 

In [2]:
path_save= "Daniele/Recommenders/FM/saved_models"
if not os.path.exists(path_save):
    os.makedirs(path_save)

In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout

import Daniele.Utils.MyDataManager as dm 
import Daniele.Utils.MatrixManipulation as mm
import Daniele.Utils.SaveSparceMatrix as ssm
import scipy.sparse as sps

URMv = dm.getURMviews()
URMo = dm.getURMopen()
ICMt=dm.getICMt()
ICMl=dm.getICMl()

name="train.csv"
dir = os.path.join(path_save,name)
if not os.path.exists(dir):
    URMv_train, URMv_test = split_train_in_two_percentage_global_sample(URMv, train_percentage = 0.80)

    ssm.saveMatrix(dir,URMv_train)

    name="test.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,URMv_test)

    urm_def = mm.defaultExplicitURM(urmv=URMv_train,urmo=URMo,icml=ICMl,icmt=ICMt, normalize=True, add_aug=True,appendICM=False)
    name="urm_def.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,urm_def)

    urm_bin = mm.defaultExplicitURM(urmv=URMv_train,urmo=URMo, normalize=False, add_aug=False)
    urm_bin.data = np.ones(len(urm_bin.data))
    name="urm_bin.csv"
    dir = os.path.join(path_save,name)
    ssm.saveMatrix(dir,urm_bin)
    
else:
    URMv_train=ssm.readMatrix(dir)

    name="test.csv"
    dir = os.path.join(path_save,name)
    URMv_test=ssm.readMatrix(dir)

    name="urm_def.csv"
    dir = os.path.join(path_save,name)
    urm_def = ssm.readMatrix(dir)

    name="urm_bin.csv"
    dir = os.path.join(path_save,name)
    urm_bin = ssm.readMatrix(dir)


/Users/daniele/Desktop/RecSys/RecSysChallenge2023-Team


In [4]:
from Evaluation.Evaluator import EvaluatorHoldout

evaluator_test = EvaluatorHoldout(URMv_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 1559 ( 3.7%) Users that have less than 1 test interactions


### Recommenders

In [5]:
from Recommenders.SLIM.SLIMElasticNetRecommender import MultiThreadSLIM_SLIMElasticNetRecommender
name="slim_elastic_high"
dir = os.path.join(path_save,name)

slim_elastic_high = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train=urm_bin)
if not os.path.exists(dir+".zip"):
    
    # {'alpha': 0.002930092866966509, 'l1_ratio': 0.006239337272696024, 'topK': 882} -> MAP 0.0422894
    slim_elastic_high.fit(alpha=0.002930092866966509, l1_ratio=0.006239337272696024, topK=882)
    slim_elastic_high.save_model(path_save,name)
else:
    slim_elastic_high.load_model(path_save,name)

r_slim = slim_elastic_high._compute_item_score(range(dm.n_users))
r_slim = sps.coo_matrix(r_slim)

SLIMElasticNetRecommender: URM Detected 3461 (12.4%) items with no interactions.
SLIMElasticNetRecommender: Loading model from file 'Daniele/Recommenders/FM/saved_modelsslim_elastic_high'
SLIMElasticNetRecommender: Loading complete


In [7]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

rp3beta_high = RP3betaRecommender(urm_bin)
# {'topK': 91, 'alpha': 0.7758215673815734, 'beta': 0.2719143753442684, 'normalize_similarity': True} -> MAP 0.0.0273508
rp3beta_high.fit( topK=91, alpha=0.7758215673815734, beta=0.2719143753442684, normalize_similarity=True )

r_rp3beta = rp3beta_high._compute_item_score(range(dm.n_users))
r_rp3beta = sps.coo_matrix(r_rp3beta)

RP3betaRecommender: URM Detected 3461 (12.4%) items with no interactions.
RP3betaRecommender: Similarity column 27968 (100.0%), 4135.00 column/sec. Elapsed time 6.76 sec


### CLASSE

In [6]:
from Recommenders.BaseRecommender import BaseRecommender
from lightfm import LightFM
import numpy as np
from  tqdm import tqdm

class LightFMCFRecommender(BaseRecommender):
    """LightFMCFRecommender"""

    RECOMMENDER_NAME = "LightFMCFRecommender"

    def __init__(self, URM_train,user_features = None):
        self.user_features = user_features
        super(LightFMCFRecommender, self).__init__(URM_train)


    def fit(self, epochs = 300, alpha = 1e-6, n_factors = 10, n_threads = 4):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(loss='bpr',    # warp
                                     item_alpha=alpha,
                                     no_components=n_factors)
        batch_size = 2
        best_map=-1
        best_epoch = 0 
        for i in tqdm(range (1,int(epochs/batch_size)+1)):
            print("Epochs->",batch_size*i)
            self.lightFM_model = self.lightFM_model.fit_partial(self.URM_train, 
                                        epochs=i*batch_size,
                                        user_features = self.user_features,
                                        num_threads=n_threads)
            result_df, _ = evaluator_test.evaluateRecommender(self)
            print("Iter ",i,": Epochs->",batch_size*i,"\tMAP ->",result_df["MAP"].values[0])
            if result_df["MAP"].values[0] > best_map : 
                best_map = result_df["MAP"].values[0]
                best_epoch = i * batch_size
        print("Best MAP -> ",best_map,"\t Best epoch -> ",best_epoch)
        

                                       
    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        items_to_compute = np.arange(self.n_items)
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute)

        return item_scores




In [49]:
# Prova con BINARIA

recommender = LightFMCFRecommender(urm_bin)
recommender.fit(epochs = 10)

result_df, _ = evaluator_test.evaluateRecommender(recommender)
result_df

LightFMCFRecommender: URM Detected 3461 (12.4%) items with no interactions.


  0%|          | 0/2 [00:00<?, ?it/s]

Epochs-> 5
EvaluatorHoldout: Processed 40070 (100.0%) in 51.89 sec. Users per second: 772


 50%|█████     | 1/2 [01:02<01:02, 62.67s/it]

Iter  1 : Epochs-> 5 	MAP -> cutoff
10    0.010107
Name: MAP, dtype: object
Epochs-> 10


  return np.sum(np.divide(np.power(2, scores) - 1, np.log2(np.arange(scores.shape[0], dtype=np.float64) + 2)),


EvaluatorHoldout: Processed 40070 (100.0%) in 51.39 sec. Users per second: 780


100%|██████████| 2/2 [02:14<00:00, 67.27s/it]

Iter  2 : Epochs-> 10 	MAP -> cutoff
10    0.009821
Name: MAP, dtype: object



  return np.sum(np.divide(np.power(2, scores) - 1, np.log2(np.arange(scores.shape[0], dtype=np.float64) + 2)),


EvaluatorHoldout: Processed 40070 (100.0%) in 51.41 sec. Users per second: 779


Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.02157,0.034411,0.031829,0.009821,0.015437,0.071608,0.027609,0.025714,0.158048,0.083126,...,0.96255,0.15213,0.96255,0.020775,9.567265,0.99818,0.047508,0.711418,2.063198,0.293197


In [61]:
result_df["MAP"].values[0]

0.00982103045860244

In [None]:
# Prova con ESPLICITA

recommender = LightFMCFRecommender(urm_def)
recommender.fit(epochs = 10)

result_df, _ = evaluator_test.evaluateRecommender(recommender)
result_df

LightFMCFRecommender: URM Detected 3461 (12.4%) items with no interactions.


  0%|          | 0/2 [00:00<?, ?it/s]

Epochs-> 5


  0%|          | 0/2 [00:06<?, ?it/s]


KeyboardInterrupt: 

In [63]:
recommender = LightFMCFRecommender(urm_def,r_slim)
recommender.fit(epochs = 10)

result_df, _ = evaluator_test.evaluateRecommender(recommender)
result_df

LightFMCFRecommender: URM Detected 3461 (12.4%) items with no interactions.


  0%|          | 0/2 [00:00<?, ?it/s]

Epochs-> 5


In [33]:
user_popularity = np.ediff1d(sps.csr_matrix(urm_def).indptr)


u1 = sps.csr_matrix(users[sort[:int(dm.n_users/2)]],shape=(1,dm.n_users))
u2 = sps.csr_matrix(users[sort[int(dm.n_users/2)]:],shape=(1,dm.n_users))
u2.tocoo().col.max()

TypeError: only integer scalar arrays can be converted to a scalar index

In [34]:
user_popularity = np.ediff1d(sps.csr_matrix(urm_def).indptr)
sort = np.argsort(user_popularity)
u = sps.coo_matrix(user_popularity)

recommender = LightFMCFRecommender(urm_def,sps.hstack([r_slim,r_rp3beta,u.T]))

LightFMCFRecommender: URM Detected 3461 (12.4%) items with no interactions.


In [None]:
user_popularity = np.ediff1d(sps.csr_matrix(urm_bin).indptr)
sort = np.argsort(user_popularity)

u1 = sps.csr_matrix(user_popularity[:int(dm.n_users/2)])
u2 = sps.csr_matrix(user_popularity[int(dm.n_users/2):])

recommender = LightFMCFRecommender(urm_def,sps.hstack(r_slim,r_rp3beta,u1,u2))
recommender.fit()

result_df, _ = evaluator_test.evaluateRecommender(recommender)
result_df 