# Import Libraries and Dataset

In [1]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import random
import pandas as pd
from scipy import stats
from scipy.optimize import fmin
import scipy.sparse as sps
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
# Numpy cries because it is an old code so we monkey patch it
np.int = int
np.bool = bool
np.float = float


In [2]:
URM_all_dataframe = pd.read_csv('data_train.csv', 
                                sep=",", 
                                header= 0, 
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]


# Build the COO sparse matrix associated with the URM
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values))) ## .values --> numpy array, df[..] --> pd series



### Split the dataset 80/20

In [3]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.8)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2971 (22.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2548 (19.6%) Users that have less than 1 test interactions


## Build SLIMElasticNetRecommender

In [4]:
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
recommender = SLIMElasticNetRecommender(URM_train, verbose = True)

SLIMElasticNetRecommender: URM Detected 837 ( 6.4%) users with no interactions.
SLIMElasticNetRecommender: URM Detected 458 ( 2.0%) items with no interactions.


In [28]:
%load_ext Cython

In [29]:
%%cython

import numpy as np
import time
import scipy.sparse as sps

from libc.stdlib cimport rand, srand, RAND_MAX

def train_multiple_epochs(URM_train, learning_rate_input, regularization_2_input, n_epochs,evaluator_validation, recommender_object,SM):

    URM_train_coo = URM_train.tocoo()
    cdef int n_items = URM_train.shape[1]
    cdef int n_interactions = URM_train.nnz
    cdef int[:] URM_train_coo_row = URM_train_coo.row
    cdef int[:] URM_train_coo_col = URM_train_coo.col
    cdef double[:] URM_train_coo_data = URM_train_coo.data
    cdef int[:] URM_train_indices = URM_train.indices
    cdef int[:] URM_train_indptr = URM_train.indptr
    cdef double[:] URM_train_data = URM_train.data

    cdef double[:,:] item_item_S = SM
    cdef double learning_rate = learning_rate_input
    cdef double regularization_2 = regularization_2_input
    cdef double loss = 0.0
    cdef long start_time
    cdef double true_rating, predicted_rating, prediction_error, profile_rating
    cdef int start_profile, end_profile
    cdef int index, sample_num, user_id, item_id, profile_item_id
    
    for n_epoch in range(n_epochs):
        
        loss = 0.0
        start_time = time.time()
        
        for sample_num in range(n_interactions):

            # Randomly pick sample
            index = rand() % n_interactions

            user_id = URM_train_coo_row[index]
            item_id = URM_train_coo_col[index]
            true_rating = URM_train_coo_data[index]

            # Compute prediction
            start_profile = URM_train_indptr[user_id]
            end_profile = URM_train_indptr[user_id+1]
            predicted_rating = 0.0

            for index in range(start_profile, end_profile):
                profile_item_id = URM_train_indices[index]
                profile_rating = URM_train_data[index]
                predicted_rating += item_item_S[profile_item_id,item_id] * profile_rating

            # Compute prediction error, or gradient
            prediction_error = true_rating - predicted_rating
            loss += prediction_error**2

            # Update model, in this case the similarity
            for index in range(start_profile, end_profile):
                profile_item_id = URM_train_indices[index]
                profile_rating = URM_train_data[index]
                item_item_S[profile_item_id,item_id] += learning_rate * (prediction_error * profile_rating - 
                                                                         regularization_2 * item_item_S[profile_item_id,item_id])

            # Ensure diagonal is always zero
            item_item_S[item_id,item_id] = 0.0
        
#             if sample_num % 1000000 == 0:
#                 print("Epoch {}: {:.2f}%".format(n_epoch+1, sample_num/n_interactions*100))
            
        #result_df, _ = evaluator_validation.evaluateRecommender(recommender_object)
        #val_MAP = result_df.loc[10]["MAP"]
        elapsed_time = time.time() - start_time
        samples_per_second = (sample_num+1)/elapsed_time

        if(n_epoch < 1000 and n_epoch != 1):
            print("Epoch {} complete in in {:.2f} seconds, loss is {:.3E}. Samples per second {:.2f}.".format(n_epoch+1, time.time() - start_time, loss/(sample_num+1), samples_per_second))
        else:
            sparse_similarity = sps.csr_matrix(item_item_S)
            recommender_object.set_similarity_matrix(sparse_similarity)
            result_df, _ = evaluator_validation.evaluateRecommender(recommender_object)
            val_MAP = result_df.loc[10]["MAP"]
            print("Epoch {} complete in in {:.2f} seconds, loss is {:.3E}. Samples per second {:.2f}. Validation MAP {:.4f}".format(n_epoch+1, time.time() - start_time, loss/(sample_num+1), samples_per_second,val_MAP))

    return np.array(item_item_S), loss/(sample_num+1), samples_per_second

Content of stdout:
_cython_magic_535074cf1db2642c58f5a7890d9c50c622e19663.c
   Creazione della libreria C:\Users\Tommaso\.ipython\cython\Users\Tommaso\.ipython\cython\_cython_magic_535074cf1db2642c58f5a7890d9c50c622e19663.cp311-win_amd64.lib e dell'oggetto C:\Users\Tommaso\.ipython\cython\Users\Tommaso\.ipython\cython\_cython_magic_535074cf1db2642c58f5a7890d9c50c622e19663.cp311-win_amd64.exp
Generazione codice in corso...
Generazione codice terminata

### Initialize the similarity matrix

In [None]:
n_items = URM_train.shape[1]
item_item_S = np.zeros((n_items, n_items), dtype = float)

### Train the model

In [30]:
learning_rate = 1e-4
regularization_2 = 1e-3
    
item_item_S, loss, samples_per_second = train_multiple_epochs(URM_train, learning_rate, regularization_2,1010, evaluator_validation, recommender,item_item_S)

Epoch 1 complete in in 1.51 seconds, loss is 3.828E-02. Samples per second 202676.59.
EvaluatorHoldout: Processed 10046 (100.0%) in 9.57 sec. Users per second: 1050
Epoch 2 complete in in 15.20 seconds, loss is 3.773E-02. Samples per second 227907.96. Validation MAP 0.0150
Epoch 3 complete in in 1.03 seconds, loss is 3.783E-02. Samples per second 297436.61.
Epoch 4 complete in in 0.83 seconds, loss is 3.730E-02. Samples per second 370820.48.
Epoch 5 complete in in 1.61 seconds, loss is 3.685E-02. Samples per second 190033.94.
Epoch 6 complete in in 1.41 seconds, loss is 3.623E-02. Samples per second 217440.23.
Epoch 7 complete in in 1.21 seconds, loss is 3.580E-02. Samples per second 253565.62.
Epoch 8 complete in in 1.01 seconds, loss is 3.515E-02. Samples per second 301994.86.
Epoch 9 complete in in 0.81 seconds, loss is 3.517E-02. Samples per second 378693.76.
Epoch 10 complete in in 1.61 seconds, loss is 3.492E-02. Samples per second 190481.47.
Epoch 11 complete in in 1.40 seconds,

### Setting the Similarity matrix in a recommender_object instance

In [5]:
item_item_S = np.load('SimMatrix(1e-3).npz')

In [6]:
sparse_similarity = sps.csr_matrix(item_item_S['arr_0'])
recommender.set_similarity_matrix(sparse_similarity)

### Evaluation

In [7]:
result_df, _ = evaluator_validation.evaluateRecommender(recommender)
result_df.loc[10]

EvaluatorHoldout: Processed 10054 (100.0%) in 9.68 sec. Users per second: 1039


PRECISION                     0.042719
PRECISION_RECALL_MIN_DEN      0.096075
RECALL                        0.089292
MAP                            0.01927
MAP_MIN_DEN                   0.047394
MRR                           0.125588
NDCG                          0.079127
F1                             0.05779
HIT_RATE                      0.285757
ARHR_ALL_HITS                 0.153407
NOVELTY                       0.004864
AVERAGE_POPULARITY            0.310783
DIVERSITY_MEAN_INTER_LIST     0.934356
DIVERSITY_HERFINDAHL          0.993426
COVERAGE_ITEM                 0.206103
COVERAGE_ITEM_HIT             0.057544
ITEMS_IN_GT                    0.73756
COVERAGE_USER                   0.7719
COVERAGE_USER_HIT             0.220576
USERS_IN_GT                     0.7719
DIVERSITY_GINI                 0.02608
SHANNON_ENTROPY               8.872478
RATIO_DIVERSITY_HERFINDAHL    0.993786
RATIO_DIVERSITY_GINI          0.076065
RATIO_SHANNON_ENTROPY         0.684002
RATIO_AVERAGE_POPULARITY 

In [8]:
result_df

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.042719,0.096075,0.089292,0.01927,0.047394,0.125588,0.079127,0.05779,0.285757,0.153407,...,0.7719,0.220576,0.7719,0.02608,8.872478,0.993786,0.076065,0.684002,2.620479,0.356233


In [22]:
recommender.recommend(41)

TopPopRecommender: URM Detected 864 ( 6.6%) users with no interactions.
TopPopRecommender: URM Detected 475 ( 2.1%) items with no interactions.


[2,
 4,
 1,
 3,
 6,
 8,
 9,
 15,
 14,
 19,
 20,
 5,
 10,
 11,
 25,
 22,
 16,
 26,
 31,
 17,
 36,
 40,
 32,
 34,
 44,
 41,
 48,
 33,
 45,
 24,
 30,
 29,
 35,
 28,
 51,
 38,
 66,
 21,
 27,
 18,
 12,
 58,
 56,
 60,
 37,
 84,
 13,
 46,
 53,
 55,
 72,
 74,
 65,
 59,
 49,
 77,
 81,
 42,
 67,
 88,
 87,
 94,
 50,
 61,
 83,
 99,
 95,
 62,
 80,
 75,
 85,
 78,
 47,
 68,
 63,
 70,
 115,
 79,
 108,
 54,
 23,
 96,
 73,
 106,
 101,
 90,
 110,
 107,
 89,
 43,
 112,
 104,
 134,
 131,
 105,
 114,
 132,
 69,
 118,
 127,
 139,
 135,
 133,
 164,
 117,
 146,
 103,
 177,
 102,
 130,
 151,
 157,
 93,
 138,
 149,
 155,
 165,
 175,
 154,
 121,
 170,
 128,
 141,
 179,
 52,
 119,
 202,
 144,
 145,
 125,
 191,
 183,
 64,
 186,
 129,
 82,
 159,
 148,
 124,
 156,
 171,
 147,
 174,
 199,
 182,
 97,
 180,
 193,
 158,
 168,
 172,
 228,
 109,
 178,
 162,
 71,
 212,
 120,
 122,
 225,
 208,
 176,
 184,
 143,
 98,
 137,
 187,
 194,
 223,
 163,
 126,
 250,
 237,
 279,
 213,
 271,
 216,
 136,
 257,
 224,
 215,
 190,
 207,
 2

### Write the predictions on submission.csv


In [42]:
def write_predictions(recommender_object, at=10):
    prediction_df = pd.read_csv('data_target_users_test.csv', sep= ",",
                                header=0, 
                                dtype={0:int},
                                engine='python')
    #prediction_df['mapped user'] = prediction_df['user_id'].map(user_original_ID_to_index).fillna(-1).astype(int)
    #prediction_df['item_list'] = prediction_df['mapped user'].apply(recommender_object.recommend)
    prediction_df['item_list'] = prediction_df['user_id'].apply(lambda user_id: recommender_object.recommend(user_id)[:at])
    #def map_items(item_list):
    #    return [index_to_item_original.get(item, item) for item in item_list]

    #prediction_df['item_list'] = prediction_df['item_list'].apply(map_items)
    def transform_items_to_string(item_list):
        return ' '.join(map(str, item_list))

    prediction_df['item_list'] = prediction_df['item_list'].apply(transform_items_to_string)
    print(prediction_df.head(10))
    #del prediction_df['mapped user']
    prediction_df.to_csv('submission.csv',index=False)

write_predictions(recommender)

   user_id                                          item_list
0        1                     3 196 146 6 128 9 137 5 101 21
1        2                     3 29 12 46 277 137 50 4 196 48
2        3         1104 803 450 435 867 3472 2952 609 499 398
3        4                      50 4 9 42 136 139 27 175 5 70
4        5               4 339 1176 688 1056 362 68 115 77 14
5        6                   24 3 29 32 46 287 48 4238 38 196
6        8                 4238 22 1094 100 46 38 5683 24 4 2
7        9  11231 10818 6721 6723 6724 21265 6747 6748 112...
8       10                   8 196 25 173 342 123 7 6 1433 89
9       11                           7 10 2 6 4 44 40 1 79 24


In [43]:
np.savez_compressed('SimMatrix(1e-3)', item_item_S)