# Import the Libraries

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pyplot
from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
# Numpy cries because it is an old code so we monkey patch it
np.int = int
np.bool = bool
np.float = float

# Import the Dataset

In [13]:
URM_all_dataframe = pd.read_csv('data_train.csv', 
                                sep=",", 
                                header= 0, 
                                dtype={0:int, 1:int, 2:float},
                                engine='python')

URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]

### Building the sparse COO matrix using URM

In [14]:
import scipy.sparse as sps
# Build the COO sparse matrix associated with the URM
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values))) ## .values --> numpy array, df[..] --> pd series

### Split the data in train and validation set

In [15]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2571 (19.7%) Users that have less than 1 test interactions


# Implement the recommender


In [16]:
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender

In [17]:
recommender = RP3betaRecommender(URM_train)
{'alpha': 0.23983143272375212, 'beta': 0.21842056439651045, 'topK': 46, 'implicit': True, 'min_rating': 1}


RP3betaRecommender: URM Detected 597 ( 4.6%) users with no interactions.
RP3betaRecommender: URM Detected 234 ( 1.0%) items with no interactions.


{'alpha': 0.23983143272375212,
 'beta': 0.21842056439651045,
 'topK': 46,
 'implicit': True,
 'min_rating': 1}

### The coefficient gamma represents how much in proportion we want to use TopPop()

In [8]:
recommender.fit(alpha = 0.23983143272375212, beta = 0.21842056439651045, topK = 46, implicit = True, min_rating = 1)

RP3betaRecommender: Similarity column 22348 (100.0%), 2621.45 column/sec. Elapsed time 8.53 sec


In [9]:
result_df, _ = evaluator_test.evaluateRecommender(recommender)

EvaluatorHoldout: Processed 10465 (100.0%) in 12.36 sec. Users per second: 847


In [10]:
result_df

Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,...,COVERAGE_USER,COVERAGE_USER_HIT,USERS_IN_GT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,0.093951,0.163515,0.138367,0.048316,0.083509,0.26681,0.149209,0.111913,0.513521,0.35421,...,0.803455,0.412591,0.803455,0.086039,10.684712,0.998317,0.248477,0.82306,1.578508,0.308876


array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.02575491]], dtype=float32)

In [46]:
def write_predictions(recommender_object, at=10):
    prediction_df = pd.read_csv('data_target_users_test.csv', sep= ",",
                                header=0, 
                                dtype={0:int},
                                engine='python')
    users = np.array(prediction_df['user_id'])
    scores = recommender_object._compute_item_score(users)
    recommendations = np.array([' '.join(map(str, np.argsort(-scores[i])[:at])) for i in range(len(users))])
    prediction_df['item_list'] = recommendations
    print(prediction_df.head(10))
    prediction_df.to_csv('submission.csv',index=False)

write_predictions(recommender)

   user_id                                         item_list
0        1           36 161 694 592 699 354 618 342 1481 403
1        2             47 1095 17 27 42 28 1227 1522 1372 50
2        3       59 857 3152 648 750 956 2925 1895 2172 1097
3        4                 17 249 27 42 119 28 47 50 252 299
4        5             77 131 170 66 60 1570 3063 5138 270 4
5        6                886 351 9 35 168 692 330 546 14 16
6        8        443 722 769 210 451 1155 480 121 1749 1996
7        9              2821 12741 10108 8133 6415 2 4 1 3 7
8       10  1446 1316 1816 1668 1674 1411 1382 809 2565 2145
9       11                 955 346 627 681 67 41 99 25 31 40
