In [339]:
!cp -r ../input/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./

In [340]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from bayes_opt import BayesianOptimization


In [341]:
def get_URM():
    return pd.read_csv('/kaggle/input/urm-true-binary/URM_True_Binary.csv')
URM_all_dataframe = get_URM()

In [342]:
ICM_type_all = pd.read_csv("/kaggle/input/competition-data/data_ICM_type.csv")
items = ICM_type_all.item_id
features = ICM_type_all.feature_id
data = ICM_type_all.data
ICM_type = sps.csr_matrix((data, (items, features)))
ICM_type = ICM_type.astype(dtype = np.int32)

n_users = 41629
n_itemsFromICM = ICM_type.shape[0]

In [343]:
ICM_type

<27968x8 sparse matrix of type '<class 'numpy.int32'>'
	with 23091 stored elements in Compressed Sparse Row format>

In [344]:
train_df = pd.read_csv('/kaggle/input/urm-split/Train_df.csv')
test_df = pd.read_csv('/kaggle/input/urm-split/Test_df.csv')

In [345]:
URM_train = sps.coo_matrix((train_df["Data"].values, 
                          (train_df["UserID"].values, train_df["ItemID"].values)))
URM_train = URM_train.tocsr() # to obtain fast access to rows (users)

URM_valid = sps.coo_matrix((test_df["Data"].values, 
                          (test_df["UserID"].values, test_df["ItemID"].values)))
URM_valid = URM_valid.tocsr() # to obtain fast access to rows (users)

In [346]:
URM_train

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1243712 stored elements in Compressed Sparse Row format>

In [347]:
URM_valid

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 310928 stored elements in Compressed Sparse Row format>

In [348]:
from Evaluation.Evaluator import EvaluatorHoldout

evaluator_valid = EvaluatorHoldout(URM_valid, cutoff_list=[10])

EvaluatorHoldout: Ignoring 323 ( 0.8%) Users that have less than 1 test interactions


In [349]:
import implicit
from Recommenders.BaseMatrixFactorizationRecommender import BaseMatrixFactorizationRecommender
import scipy.sparse as sps
class ImplicitALSRecommender(BaseMatrixFactorizationRecommender):
    """ImplicitALSRecommender recommender"""

    RECOMMENDER_NAME = "ImplicitALSRecommender"

    def fit(self,
            factors=100,
            regularization=0.01,
            use_native=True, use_cg=True, use_gpu=False,
            iterations=15,
            calculate_training_loss=False, num_threads=0,
            confidence_scaling=None,
            icm_coeff = 1,
            **confidence_args
            ):
        self.rec = implicit.als.AlternatingLeastSquares(factors=factors, regularization=regularization,
                                                        use_native=use_native, use_cg=use_cg, use_gpu=use_gpu,
                                                        iterations=iterations,
                                                        calculate_training_loss=calculate_training_loss,
                                                        num_threads=num_threads,
                                                        random_state=5)
        self.rec.fit(confidence_scaling(self.URM_train, **confidence_args), show_progress=self.verbose)

        self.USER_factors = self.rec.user_factors
        self.ITEM_factors = self.rec.item_factors

In [350]:
tuning_params = {
    "alpha":(3, 40),
    "factors":(40,250),
    "epochs": (50, 200),
    "regularization": (0.0001, 0.01),
    "icm_coeff": (0.1, 1.9)
}

In [351]:
from Recommenders.BaseMatrixFactorizationRecommender import BaseMatrixFactorizationRecommender
from Recommenders.Incremental_Training_Early_Stopping import Incremental_Training_Early_Stopping
from Recommenders.Recommender_utils import check_matrix

def linear_scaling_confidence(URM_train, alpha):
    C = check_matrix(URM_train.T, format="csr", dtype=np.float32)
    C.data = 1.0 + alpha * C.data

    return C

In [352]:
def BO_func( factors,
             epochs,
             alpha,
             regularization,
             icm_coeff
             ):
    recommender = ImplicitALSRecommender(URM_train)
    recommender.fit(factors=int(factors),
                    regularization= regularization,
                    use_gpu=True,
                    iterations=int(epochs),
                    num_threads=2,
                    confidence_scaling=linear_scaling_confidence,
                    **{"alpha":alpha}
                    )
    result_dict, _ = evaluator_valid.evaluateRecommender(recommender)
    
    return result_dict["MAP"][10]

In [353]:
optimizer = BayesianOptimization(
    f=BO_func,
    pbounds=tuning_params,
    verbose=5,
    random_state=5,
)

In [354]:
URM_train

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1243712 stored elements in Compressed Sparse Row format>

In [355]:
URM_valid

<41629x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 310928 stored elements in Compressed Sparse Row format>

In [None]:
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

optimizer.maximize(
    init_points=50,
    n_iter=200
)

|   iter    |  target   |   alpha   |  epochs   |  factors  | icm_coeff | regula... |
-------------------------------------------------------------------------------------


  0%|          | 0/180 [00:00<?, ?it/s]

EvaluatorHoldout: Processed 41306 (100.0%) in 34.63 sec. Users per second: 1193
| [0m1        [0m | [0m0.02559  [0m | [0m11.21    [0m | [0m180.6    [0m | [0m83.41    [0m | [0m1.753    [0m | [0m0.004935 [0m |


  0%|          | 0/164 [00:00<?, ?it/s]

EvaluatorHoldout: Processed 41306 (100.0%) in 35.50 sec. Users per second: 1163
| [0m2        [0m | [0m0.02271  [0m | [0m25.63    [0m | [0m164.9    [0m | [0m148.9    [0m | [0m0.6342   [0m | [0m0.001958 [0m |


  0%|          | 0/160 [00:00<?, ?it/s]

EvaluatorHoldout: Processed 41306 (100.0%) in 35.81 sec. Users per second: 1153
| [95m3        [0m | [95m0.02577  [0m | [95m5.987    [0m | [95m160.8    [0m | [95m132.7    [0m | [95m0.385    [0m | [95m0.008811 [0m |


  0%|          | 0/112 [00:00<?, ?it/s]

EvaluatorHoldout: Processed 41306 (100.0%) in 35.30 sec. Users per second: 1170
| [0m4        [0m | [0m0.02511  [0m | [0m13.14    [0m | [0m112.1    [0m | [0m102.2    [0m | [0m1.232    [0m | [0m0.00584  [0m |


  0%|          | 0/89 [00:00<?, ?it/s]