# Import the Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pyplot
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
# Numpy cries because it is an old code so we monkey patch it
np.int = int
np.bool = bool
np.float = float

# Import the Dataset

In [2]:
URM_all_dataframe = pd.read_csv('data_train.csv', 
                                sep=",", 
                                header= 0, 
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]

### Building the sparse COO matrix using URM

In [3]:
import scipy.sparse as sps
# Build the COO sparse matrix associated with the URM
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values))) ## .values --> numpy array, df[..] --> pd series

### Split the data in train and validation set

In [4]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2525 (19.4%) Users that have less than 1 test interactions


# Implement the recommender


In [24]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

In [25]:
recommender = RP3betaRecommender(URM_train)
{'alpha': 0.23983143272375212, 'beta': 0.21842056439651045, 'topK': 46, 'implicit': True, 'min_rating': 1}


RP3betaRecommender: URM Detected 634 ( 4.9%) users with no interactions.
RP3betaRecommender: URM Detected 238 ( 1.1%) items with no interactions.


{'alpha': 0.23983143272375212,
 'beta': 0.21842056439651045,
 'topK': 46,
 'implicit': True,
 'min_rating': 1}

### The coefficient gamma represents how much in proportion we want to use TopPop()

In [26]:
recommender.fit(alpha = 0.23983143272375212, beta = 0.21842056439651045, topK = 46, implicit = True, min_rating = 1)

RP3betaRecommender: Similarity column 22348 (100.0%), 1983.08 column/sec. Elapsed time 11.27 sec


In [27]:
W_sparse = recommender.get_similarity_matrix()

In [28]:
recommender = RP3betaRecommender(URM_all)

RP3betaRecommender: URM Detected 387 ( 3.0%) users with no interactions.
RP3betaRecommender: URM Detected 126 ( 0.6%) items with no interactions.


In [29]:
recommender.set_similarity_matrix(W_sparse)

In [30]:
result_df, _ = evaluator_test.evaluateRecommender(recommender)

EvaluatorHoldout: Processed 10500 (100.0%) in 14.25 sec. Users per second: 737


In [42]:
result_df

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.806142,0.0,0.806142,0.089427,10.738346,0.998297,0.255296,0.826625,1.566578,0.245129


In [43]:
def write_predictions(recommender_object, at=10):
    prediction_df = pd.read_csv('data_target_users_test.csv', sep= ",",
                                header=0, 
                                dtype={0:int},
                                engine='python')
    users = np.array(prediction_df['user_id'])
    recommendations = recommender_object.recommend(users)
    truncated_recommendations = [inner_list[:10] for inner_list in recommendations]
    prediction_df['item_list'] = truncated_recommendations
    def transform_items_to_string(item_list):
        return ' '.join(map(str, item_list))

    prediction_df['item_list'] = prediction_df['item_list'].apply(transform_items_to_string)
    print(prediction_df.head(10))
    prediction_df.to_csv('submission.csv',index=False)

write_predictions(recommender)

   user_id                                          item_list
0        1            101 36 403 506 515 1546 123 637 977 318
1        2                 1095 47 3176 1522 11 50 9 28 4 196
2        3           59 239 1281 857 536 259 375 414 2748 584
3        4                    28 50 249 7 1 2 136 639 145 171
4        5        1570 4 7033 10150 5138 9582 1511 77 471 766
5        6                     886 6 9 3 2 395 14 546 874 319
6        8          210 451 443 760 600 366 722 3916 480 1155
7        9  9018 3646 10012 12741 7395 21052 7055 6748 598...
8       10   1816 561 1668 2617 1446 1767 3721 2565 2423 3905
9       11                   40 58 31 4 188 955 203 34 520 44
