# Recommender Systems 2020/21

### Practice - Hybrid model with LightFM on MovieLens

In [1]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.Movielens.Movielens10MReader import Movielens10MReader

data_reader = Movielens10MReader()
data_loaded = data_reader.load_data()

URM_all = data_loaded.get_URM_all()
ICM_genres = data_loaded.get_ICM_from_name("ICM_genres")

Movielens10M: Verifying data consistency...
Movielens10M: Verifying data consistency... Passed!
DataReader: current dataset is: <class 'Data_manager.Dataset.Dataset'>
	Number of items: 10681
	Number of users: 69878
	Number of interactions in URM_all: 10000054
	Value range in URM_all: 0.50-5.00
	Interaction density: 1.34E-02
	Interactions per user:
		 Min: 2.00E+01
		 Avg: 1.43E+02
		 Max: 7.36E+03
	Interactions per item:
		 Min: 0.00E+00
		 Avg: 9.36E+02
		 Max: 3.49E+04
	Gini Index: 0.57

	ICM name: ICM_genres, Value range: 1.00 / 1.00, Num features: 20, feature occurrences: 21564, density 1.01E-01
	ICM name: ICM_tags, Value range: 1.00 / 69.00, Num features: 10217, feature occurrences: 108563, density 9.95E-04
	ICM name: ICM_all, Value range: 1.00 / 69.00, Num features: 10237, feature occurrences: 130127, density 1.19E-03




In [2]:
from Base.Evaluation.Evaluator import EvaluatorHoldout

URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.8)

evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])


## A pure collaborative filtering model


In [3]:
## In order to evaluate put it in a recommender class
from Base.BaseRecommender import BaseRecommender
from lightfm import LightFM
import numpy as np

class LightFMWrapper(BaseRecommender):
    """LightFMWrapper"""

    RECOMMENDER_NAME = "LightFMWrapper"

    def __init__(self, URM_train):
        super(LightFMWrapper, self).__init__(URM_train)


    def fit(self, ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(loss='warp',
                                     item_alpha=ITEM_ALPHA,
                                     no_components=NUM_COMPONENTS)

        self.lightFM_model = self.lightFM_model.fit(URM_train, 
                                       epochs=NUM_EPOCHS,
                                       num_threads=NUM_THREADS)


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        items_to_compute = np.arange(self.n_items) if items_to_compute is None else np.array(items_to_compute)
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute)

        return item_scores


  "LightFM was compiled without OpenMP support. "


In [4]:
# Set the number of threads; you can increase this
# if you have more physical cores available.
NUM_THREADS = 4
NUM_COMPONENTS = 10
NUM_EPOCHS = 3
ITEM_ALPHA = 1e-6

recommender = LightFMWrapper(URM_train)
recommender.fit(ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)

result_dict, _ = evaluator_validation.evaluateRecommender(recommender)
result_dict

LightFMWrapper: URM Detected 67 (0.63 %) cold items.
EvaluatorHoldout: Processed 23000 ( 33.03% ) in 30.86 sec. Users per second: 745
EvaluatorHoldout: Processed 45000 ( 64.62% ) in 1.02 min. Users per second: 736
EvaluatorHoldout: Processed 67000 ( 96.22% ) in 1.52 min. Users per second: 733
EvaluatorHoldout: Processed 69633 ( 100.00% ) in 1.59 min. Users per second: 732


{10: {'ROC_AUC': 0.393381014156076,
  'PRECISION': 0.1663880631310307,
  'PRECISION_RECALL_MIN_DEN': 0.20443265588134776,
  'RECALL': 0.12839842022035525,
  'MAP': 0.10275651354390812,
  'MRR': 0.3611644007049251,
  'NDCG': 0.13380897842861825,
  'F1': 0.14494534624970035,
  'HIT_RATE': 1.6638806313098675,
  'ARHR': 0.5478780928308715,
  'NOVELTY': 0.00834751945953207,
  'AVERAGE_POPULARITY': 0.6355808532649352,
  'DIVERSITY_MEAN_INTER_LIST': 0.8406645637575517,
  'DIVERSITY_HERFINDAHL': 0.9840652490967872,
  'COVERAGE_ITEM': 0.044939612395843084,
  'COVERAGE_ITEM_CORRECT': 0.03960303342383672,
  'COVERAGE_USER': 0.99649388935001,
  'COVERAGE_USER_CORRECT': 0.6945390537794441,
  'DIVERSITY_GINI': 0.006947374690823262,
  'SHANNON_ENTROPY': 6.605857170229383}}

## A hybrid model


In [5]:
## In order to evaluate put it in a dummy recommender class
from Base.BaseRecommender import BaseRecommender
from lightfm import LightFM


class LightFMWrapper(BaseRecommender):
    """LightFMWrapper"""

    RECOMMENDER_NAME = "LightFMWrapper"

    def __init__(self, URM_train, ICM_train):
        super(LightFMWrapper, self).__init__(URM_train)
        
        self.ICM_train = ICM_train.copy()


    def fit(self, ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(loss='warp',
                                     item_alpha=ITEM_ALPHA,
                                     no_components=NUM_COMPONENTS)

        self.lightFM_model = self.lightFM_model.fit(URM_train, 
                                       item_features=self.ICM_train, 
                                       epochs=NUM_EPOCHS, 
                                       num_threads=NUM_THREADS)


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        if items_to_compute is None:
            items_to_compute = np.arange(self.n_items)
            item_features = self.ICM_train 
        else:     
            items_to_compute = np.array(items_to_compute)
            item_features = self.ICM_train[items_to_compute,:]
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute,
                                                                 item_features = item_features)

        return item_scores


In [6]:
recommender = LightFMWrapper(URM_train, ICM_genres)
recommender.fit(ITEM_ALPHA, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)

result_dict, _ = evaluator_validation.evaluateRecommender(recommender)
result_dict

LightFMWrapper: URM Detected 67 (0.63 %) cold items.
EvaluatorHoldout: Processed 21000 ( 30.16% ) in 31.04 sec. Users per second: 677
EvaluatorHoldout: Processed 42000 ( 60.32% ) in 1.04 min. Users per second: 673
EvaluatorHoldout: Processed 62000 ( 89.04% ) in 1.55 min. Users per second: 668
EvaluatorHoldout: Processed 69633 ( 100.00% ) in 1.75 min. Users per second: 663


{10: {'ROC_AUC': 0.03868070329726443,
  'PRECISION': 0.009738198842503827,
  'PRECISION_RECALL_MIN_DEN': 0.010646902957661701,
  'RECALL': 0.0044830359369395344,
  'MAP': 0.0027995338146300694,
  'MRR': 0.02238778981594856,
  'NDCG': 0.0035187683171075046,
  'F1': 0.006139649059885138,
  'HIT_RATE': 0.09738198842502836,
  'ARHR': 0.024103212101359135,
  'NOVELTY': 0.01317335684681066,
  'AVERAGE_POPULARITY': 0.06398865119726148,
  'DIVERSITY_MEAN_INTER_LIST': 0.8191545897758553,
  'DIVERSITY_HERFINDAHL': 0.9819142825891063,
  'COVERAGE_ITEM': 0.0607621009268795,
  'COVERAGE_ITEM_CORRECT': 0.025653028742627097,
  'COVERAGE_USER': 0.99649388935001,
  'COVERAGE_USER_CORRECT': 0.08579238100689773,
  'DIVERSITY_GINI': 0.00706576386401895,
  'SHANNON_ENTROPY': 6.5441175554756885}}