In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
import plotly.express as px
import time
import os
import implicit
from mab2rec import BanditRecommender, LearningPolicy

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'BaseMAasB' is not defined

In [None]:
FACTORS = 10

In [None]:
def load_data():
    df = pd.read_csv("/workspace/gregorio/reinforcement-learning-recsys/1-datasets/bestbuy/interactions.csv", sep=';')
    df = df.rename(columns={
        'id_user': 'user_id',
        'id_item': 'item_id',
    })
    df['response'] = 1
    df = df.sort_values(by='timestamp')
    df = df[['user_id', 'item_id', 'response']]
    df = df.iloc[:int(len(df) * 0.5)]
    df = df.reset_index(drop=True)
    return df

In [None]:
def get_concat_context(interactions, context_cols):
    # Concat multiple array columns into a single array column
    return np.array(interactions[context_cols].apply(lambda x: np.concatenate((*x, [1])), axis=1).tolist())  # MUDANÇA: adiciona 1 ao final de cada vetor (bias)

In [None]:

def train_mab(mab_algo, df_train_with_contexts, contexts_col):
    contexts = get_concat_context(df_train_with_contexts, contexts_col)
    mab_algo.fit(
        decisions=df_train_with_contexts['item_id'],
        rewards=df_train_with_contexts['response'],
        contexts=contexts
    )

In [None]:
def create_contexts_list_items_mean(interactions_df, items_embeddings):
    users_current_info = {}
    contexts = []

    for _, row in tqdm(interactions_df.iterrows(), total=len(interactions_df)):
        user_id = row["user_id"]
        item_id = row["item_id"]

        if user_id not in users_current_info:
            users_current_info[user_id] = {
                'acum_emb': np.zeros((FACTORS, )),
                'count': 0
            }
        
        contexts.append(users_current_info[user_id]['acum_emb'] / max(1, users_current_info[user_id]['count']))

        users_current_info[user_id]['acum_emb'] += items_embeddings[item_id][:FACTORS]
        users_current_info[user_id]['count'] += 1

    return contexts

In [None]:

def test_non_incremental(mab_algo, contexts_col, df_test, interactions_by_user):
    start_time = time.time()
    hits = 0

    # contexts = df_test.merge(user_features, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
    # contexts = np.array(df_test[contexts_col].tolist())
    print('entrou')
    contexts = get_concat_context(df_test, contexts_col)
    filters = df_test.merge(interactions_by_user, how='left', on='user_id')[['interactions']].values.squeeze(axis=1) 
    print('saiu')

    recomendations = mab_algo.recommend(contexts, filters, apply_sigmoid=False)

    df_test = df_test.reset_index(drop=True)

    hits = 0
    for i, interaction in tqdm(df_test.iterrows(), total=len(df_test)):
        if interaction['item_id'] in recomendations[i]:
            hits += 1
    

    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': recomendations
    })

    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [None]:

def train_embeddings_model(Model, df, num_users, num_items, generate_embeddings=False):
    sparse_matrix = csr_matrix((df['response'], (df['user_id'], df['item_id'])), shape=(num_users, num_items))

    model = Model(factors=FACTORS, random_state=1)
    model.fit(sparse_matrix)

    if not generate_embeddings:
        return model, sparse_matrix
    
    # # Não precisamos mais do código abaixo, ele funcina para embeddings de usuário, não de itens
    # user_features_list = []

    # for user_id in df['user_id'].unique():
    #    user_factors = model.user_factors[user_id][:FACTORS]  # O BPR coloca 1 no final dos vetores latentes ?
    #    user_features_list.append([user_id] + list(user_factors))

    # df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(FACTORS)])

    # model = model.to_cpu()
    return model, sparse_matrix, model.item_factors, model.user_factors

In [None]:
def group_interactions_by_user(interactions_df):
    interactions_by_user = interactions_df\
                        .groupby('user_id')[['item_id']]\
                        .apply(lambda df_user: df_user['item_id'].tolist())\
                        .reset_index(name='interactions')
    interactions_by_user = interactions_by_user.reset_index(drop=True)
    return interactions_by_user

In [None]:
df_full = load_data()

# Troquei o labelEnconder pois ele não fazia na ordem (item_id primeiro a parecer precisa ser o 0, dps o 1, etc)
df_full['user_id'] = pd.factorize(df_full['user_id'])[0]
df_full['item_id'] = pd.factorize(df_full['item_id'])[0]
df_full

Unnamed: 0,user_id,item_id,response
0,0,0,1
1,1,1,1
2,2,2,1
3,3,3,1
4,4,4,1
...,...,...,...
932629,647092,40832,1
932630,647093,41655,1
932631,647094,790,1
932632,647095,1803,1


In [None]:
num_users = df_full['user_id'].nunique()
num_items = df_full['item_id'].nunique()

split_index = int(len(df_full) * (1 - 0.1))
df_train_full = df_full[:split_index]
df_test = df_full[split_index:]

initial_df_train = df_train_full[:int(len(df_train_full) * 0.5)]
extra_df_train = df_train_full[int(len(df_train_full) * 0.5):]
extra_df_train = extra_df_train[(extra_df_train['user_id'].isin(initial_df_train['user_id'])) & (extra_df_train['item_id'].isin(initial_df_train['item_id']))]
extra_df_train = extra_df_train.reset_index(drop=True)

df_test = df_test[(df_test['user_id'].isin(initial_df_train['user_id'])) & (df_test['item_id'].isin(initial_df_train['item_id']))]
df_test = df_test.reset_index(drop=True)
df_test_for_evaluation = df_test[df_test['response'] == 1]
df_test_for_evaluation = df_test_for_evaluation.reset_index(drop=True)

In [None]:
df_full_new = pd.concat([initial_df_train, extra_df_train, df_test_for_evaluation])
ALS_model, _, ALS_item_embeddings, ALS_user_embeddings = train_embeddings_model(implicit.als.AlternatingLeastSquares, initial_df_train, num_users, num_items, generate_embeddings=True)
als_contexts = create_contexts_list_items_mean(df_full_new, ALS_item_embeddings)

initial_df_train['items_mean'] = als_contexts[:len(initial_df_train)]

extra_df_train['items_mean'] = als_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]

df_test_for_evaluation['items_mean'] = als_contexts[len(initial_df_train) + len(extra_df_train):]

100%|██████████| 15/15 [00:36<00:00,  2.42s/it]
100%|██████████| 461136/461136 [00:31<00:00, 14769.70it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_df_train['items_mean'] = als_contexts[:len(initial_df_train)]


In [None]:
current_df_train = initial_df_train
interactions_by_user = group_interactions_by_user(current_df_train)

In [None]:
print(f'Training LinGreedy - ALS embeddings')
linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
start_time = time.time()
train_mab(linGreedy_model, current_df_train, ['items_mean'])
print(f'Treino demorou {time.time() - start_time}')

print(f'Testing LinGreedy - ALS embeddings')
hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, ['items_mean'], df_test_for_evaluation, interactions_by_user)
hits

Training LinGreedy - ALS embeddings
arm_to_model demorou -0.38626527786254883
reset_arm_to_status demorou -0.01086878776550293
paralel fit demorou -20.893952131271362
_set_arms_as_trained acabou em -8.390342473983765 segundos
Treino demorou 33.76591157913208
Testing LinGreedy - ALS embeddings
entrou
saiu
oi1
Gerando as predições
Gerar as predições demorou 3.563124179840088 segundos
Formatando as predições


100%|██████████| 3799/3799 [00:14<00:00, 255.07it/s]


Formatação demorou 14.898889064788818 segundos
predict_expectations demorou 18.509413957595825 segundos
oi2
transformação das expectativas demorou 38.621105670928955 segundos
oi3
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 238510.35it/s]

fazendo o restante





O restante demorou 8.507043838500977 segundos


100%|██████████| 3799/3799 [00:00<00:00, 15915.19it/s]


114

In [None]:
from mabwiser.linear import _Linear
from mabwiser.utils import Num, _BaseRNG
from typing import List, Optional

class _LinearArmEncoded(_Linear):

    def __init__(self, rng: _BaseRNG, num_arms: int, n_jobs: int, backend: Optional[str],
                 alpha: Num, epsilon: Num, l2_lambda: Num, regression: str, scale: bool):
        super().__init__(rng, np.arange(num_arms).tolist(), n_jobs, backend, alpha, epsilon, l2_lambda, regression, scale)
        self.num_arms = num_arms
    
    def _vectorized_predict_context(self, contexts: np.ndarray, is_predict: bool) -> List:

        arms = np.arange(self.num_arms)

        # Initializing array with expectations for each arm
        num_contexts = contexts.shape[0]
        arm_expectations = np.empty((num_contexts, self.num_arms), dtype=float)

        # With epsilon probability, assign random flag to context
        random_values = self.rng.rand(num_contexts)
        random_mask = np.array(random_values < self.epsilon)
        random_indices = random_mask.nonzero()[0]

        # For random indices, generate random expectations
        arm_expectations[random_indices] = self.rng.rand((random_indices.shape[0], self.num_arms))

        # For non-random indices, get expectations for each arm
        nonrandom_indices = np.where(~random_mask)[0]
        nonrandom_context = contexts[nonrandom_indices]
        print('Gerando as predições')
        start_time = time.time()
        arm_expectations[nonrandom_indices] = np.array([self.arm_to_model[arm].predict(nonrandom_context)
                                                        for arm in arms]).T
        print(f'Gerar as predições demorou {time.time() - start_time} segundos')

        return arm_expectations if len(arm_expectations) > 1 else arm_expectations[0]

In [None]:
from mabwiser.mab import MAB, LearningPolicyType, NeighborhoodPolicyType, NeighborhoodPolicy
from typing import List

from mabwiser._version import __author__, __copyright__, __email__, __version__
from mabwiser.approximate import _LSHNearest
from mabwiser.clusters import _Clusters
from mabwiser.greedy import _EpsilonGreedy
from mabwiser.linear import _Linear
from mabwiser.neighbors import _KNearest, _Radius
from mabwiser.popularity import _Popularity
from mabwiser.rand import _Random
from mabwiser.softmax import _Softmax
from mabwiser.thompson import _ThompsonSampling
from mabwiser.treebandit import _TreeBandit
from mabwiser.ucb import _UCB1
from mabwiser.utils import Arm, Constants, check_true, create_rng

class MABArmEncoded(MAB):
    def __init__(self,
                 num_arms: int,  # The list of arms
                 learning_policy: LearningPolicyType,  # The learning policy
                 neighborhood_policy: NeighborhoodPolicyType = None,  # The context policy, optional
                 seed: int = Constants.default_seed,  # The random seed
                 n_jobs: int = 1,  # Number of parallel jobs
                 backend: str = None  # Parallel backend implementation
                 ):
        """Initializes a multi-armed bandit (MAB) with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        arms : List[Union[int, float, str]]
            The list of all the arms available for decisions.
            Arms can be integers, strings, etc.
        learning_policy : LearningPolicyType
            The learning policy.
        neighborhood_policy : NeighborhoodPolicyType, optional
            The context policy. Default value is None.
        seed : numbers.Rational, optional
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        n_jobs: int, optional
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            Default value is set to 1.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend: str, optional
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.

        Raises
        ------
        TypeError:  Arms were not provided in a list.
        TypeError:  Learning policy type mismatch.
        TypeError:  Context policy type mismatch.
        TypeError:  Seed is not an integer.
        TypeError:  Number of parallel jobs is not an integer.
        TypeError:  Parallel backend is not a string.
        TypeError:  For EpsilonGreedy, epsilon must be integer or float.
        TypeError:  For LinGreedy, epsilon must be an integer or float.
        TypeError:  For LinGreedy, l2_lambda must be an integer or float.
        TypeError:  For LinTS, alpha must be an integer or float.
        TypeError:  For LinTS, l2_lambda must be an integer or float.
        TypeError:  For LinUCB, alpha must be an integer or float.
        TypeError:  For LinUCB, l2_lambda must be an integer or float.
        TypeError:  For Softmax, tau must be an integer or float.
        TypeError:  For ThompsonSampling, binarizer must be a callable function.
        TypeError:  For UCB, alpha must be an integer or float.
        TypeError:  For LSHNearest, n_dimensions must be an integer or float.
        TypeError:  For LSHNearest, n_tables must be an integer or float.
        TypeError:  For LSHNearest, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For Clusters, n_clusters must be an integer.
        TypeError:  For Clusters, is_minibatch must be a boolean.
        TypeError:  For Radius, radius must be an integer or float.
        TypeError:  For Radius, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For KNearest, k must be an integer or float.

        ValueError: Invalid number of arms.
        ValueError: Invalid values (None, NaN, Inf) in arms.
        ValueError: Duplicate values in arms.
        ValueError: Number of parallel jobs is 0.
        ValueError: For EpsilonGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, l2_lambda cannot be negative.
        ValueError: For LinTS, alpha must be greater than zero.
        ValueError: For LinTS, l2_lambda must be greater than zero.
        ValueError: For LinUCB, alpha cannot be negative.
        ValueError: For LinUCB, l2_lambda cannot be negative.
        ValueError: For Softmax, tau must be greater than zero.
        ValueError: For UCB, alpha must be greater than zero.
        ValueError: For LSHNearest, n_dimensions must be gerater than zero.
        ValueError: For LSHNearest, n_tables must be gerater than zero.
        ValueError: For LSHNearest, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For Clusters, n_clusters cannot be less than 2.
        ValueError: For Radius and KNearest, metric is not supported by scipy.spatial.distance.cdist.
        ValueError: For Radius, radius must be greater than zero.
        ValueError: For Radius, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For KNearest, k must be greater than zero.
        """

        # Validate arguments
        # MAB._validate_mab_args(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend)

        # Save the arguments
        self.arms = np.arange(num_arms)
        self.num_arms = num_arms
        self.seed = seed
        self.n_jobs = n_jobs
        self.backend = backend

        # Create the random number generator
        self._rng = create_rng(self.seed)
        self._is_initial_fit = False

        # Create the learning policy implementor
        lp = None
        if isinstance(learning_policy, LearningPolicy.EpsilonGreedy):
            lp = _EpsilonGreedy(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.epsilon)
        elif isinstance(learning_policy, LearningPolicy.Popularity):
            lp = _Popularity(self._rng, self.arms, self.n_jobs, self.backend)
        elif isinstance(learning_policy, LearningPolicy.Random):
            lp = _Random(self._rng, self.arms, self.n_jobs, self.backend)
        elif isinstance(learning_policy, LearningPolicy.Softmax):
            lp = _Softmax(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.tau)
        elif isinstance(learning_policy, LearningPolicy.ThompsonSampling):
            lp = _ThompsonSampling(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.binarizer)
        elif isinstance(learning_policy, LearningPolicy.UCB1):
            lp = _UCB1(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.alpha)
        elif isinstance(learning_policy, LearningPolicy.LinGreedy):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, 0, learning_policy.epsilon,
                         learning_policy.l2_lambda, "ridge", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinTS):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ts", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinUCB):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ucb", learning_policy.scale)
        else:
            check_true(False, ValueError("Undefined learning policy " + str(learning_policy)))

        # Create the mab implementor
        if neighborhood_policy:
            self.is_contextual = True

            # Do not use parallel fit or predict for Learning Policy when contextual
            lp.n_jobs = 1

            if isinstance(neighborhood_policy, NeighborhoodPolicy.Clusters):
                self._imp = _Clusters(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.n_clusters, neighborhood_policy.is_minibatch)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.LSHNearest):
                self._imp = _LSHNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.n_dimensions, neighborhood_policy.n_tables,
                                        neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.KNearest):
                self._imp = _KNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.k, neighborhood_policy.metric)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.Radius):
                self._imp = _Radius(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                    neighborhood_policy.radius, neighborhood_policy.metric,
                                    neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.TreeBandit):
                self._imp = _TreeBandit(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.tree_parameters)
            else:
                check_true(False, ValueError("Undefined context policy " + str(neighborhood_policy)))
        else:
            self.is_contextual = isinstance(learning_policy, (LearningPolicy.LinGreedy, LearningPolicy.LinTS,
                                                              LearningPolicy.LinUCB))
            self._imp = lp

In [None]:
mab_arm_encoded = MABArmEncoded(initial_df_train['item_id'].nunique(), LearningPolicy.LinGreedy(epsilon=0.01))
contexts = get_concat_context(initial_df_train, ['items_mean'])
mab_arm_encoded.fit(
    decisions=initial_df_train['item_id'],
    rewards=initial_df_train['response'],
    contexts=contexts
)

arm_to_model demorou -0.3874397277832031
reset_arm_to_status demorou -0.01585555076599121
paralel fit demorou -20.164703130722046
_set_arms_as_trained acabou em -19.889243602752686 segundos


In [None]:
new_expecs = mab_arm_encoded.predict_expectations([contexts[0]])
new_expecs

Gerando as predições
Gerar as predições demorou 0.12679529190063477 segundos


array([0.99635754, 0.99859976, 0.86409422, ..., 0.5       , 0.5       ,
       0.49999844])

In [None]:
old_expecs = linGreedy_model.mab.predict_expectations([contexts[0]])
old_expecs

Gerando as predições
Gerar as predições demorou 0.13171768188476562 segundos
Formatando as predições


100%|██████████| 1/1 [00:00<00:00, 286.99it/s]

Formatação demorou 0.007084369659423828 segundos





{0: 0.9963575424544597,
 1: 0.9985997585810332,
 2: 0.8640942205254151,
 3: 0.799999165890977,
 4: 0.9822321485685649,
 5: 0.5,
 6: 0.9996184040604488,
 7: 0.998860461502671,
 8: 0.9795916261831876,
 9: 0.9898917955916612,
 10: 0.9993227683651622,
 11: 0.9913797366180096,
 12: 0.5,
 13: 0.49999547283463697,
 14: 0.9891206264013634,
 15: 0.499998866739919,
 16: 0.9751891830633864,
 17: 0.9991877428935969,
 18: 0.7999823412917986,
 19: 0.9814812692926982,
 20: 0.9861528263620142,
 21: 0.6666664425251037,
 22: 0.9910382915044242,
 23: 0.9729729242689892,
 24: 0.8337360802917398,
 25: 0.9992895408863376,
 26: 0.9946803367288888,
 27: 0.9609673766575105,
 28: 0.9285714284576941,
 29: 0.9926189596240598,
 30: 0.5,
 31: 0.9950856534883689,
 32: 0.9166656025199647,
 33: 0.499997804507997,
 34: 0.909083384234908,
 35: 0.9375229072465072,
 36: 0.9967911500906883,
 37: 0.9976596026275447,
 38: 0.999618902392124,
 39: 0.8333333333333333,
 40: 0.7499999864352241,
 41: 0.9993090162684857,
 42: 0.799

In [None]:
new_expecs.tolist() == list(old_expecs.values())

True

In [None]:
contexts = get_concat_context(df_test_for_evaluation, ['items_mean'])

In [None]:
new_expecs = mab_arm_encoded.predict_expectations(contexts)

Gerando as predições
Gerar as predições demorou 4.0329015254974365 segundos


In [None]:
old_expecs = linGreedy_model.mab.predict_expectations(contexts)

Gerando as predições
Gerar as predições demorou 2.9999358654022217 segundos
Formatando as predições


100%|██████████| 3799/3799 [00:14<00:00, 266.39it/s]

Formatação demorou 14.264642000198364 segundos





In [None]:
new_expecs.tolist() == [list(expec.values()) for expec in old_expecs]  # Talvez tenha alguma pequena mudança numérica na hora de conversão ?

False

In [None]:
from typing import Dict, List, Tuple, Union
from mabwiser.utils import Arm, Num, _BaseRNG

class BanditRecommenderArmEncoded(BanditRecommender):
    def _init(self, num_arms: int) -> None:
        """Initializes recommender with given list of arms.

        Parameters
        ----------
        arms : List[Union[Arm]]
            The list of all of the arms available for decisions.
            Arms can be integers, strings, etc.

        Returns
        -------
        Returns nothing
        """
        self.mab = MABArmEncoded(num_arms, self.learning_policy, self.neighborhood_policy, self.seed, self.n_jobs, self.backend)
    
    def fit(self, decisions: Union[List[Arm], np.ndarray, pd.Series],
            rewards: Union[List[Num], np.ndarray, pd.Series],
            contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None) -> None:
        """Fits the recommender the given *decisions*, their corresponding *rewards* and *contexts*, if any.
        If the recommender arms has not been initialized using the `set_arms`, the recommender arms will be set
        to the list of arms in *decisions*.

        Validates arguments and raises exceptions in case there are violations.

        This function makes the following assumptions:
            - each decision corresponds to an arm of the bandit.
            - there are no ``None``, ``Nan``, or ``Infinity`` values in the contexts.

        Parameters
        ----------
         decisions : Union[List[Arm], np.ndarray, pd.Series]
            The decisions that are made.
         rewards : Union[List[Num], np.ndarray, pd.Series]
            The rewards that are received corresponding to the decisions.
         contexts : Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame], default=None
            The context under which each decision is made.

        Returns
        -------
        Returns nothing.
        """
        if self.mab is None:
            self._init(np.unique(decisions).shape[0])
        self.mab.fit(decisions, rewards, contexts)
    
    def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
            -> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
                     Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
        self._validate_mab(is_fit=True)
        self._validate_get_rec(contexts, excluded_arms)

        print('oi1')
        start_time = time.time()
        # Get predicted expectations
        num_contexts = len(contexts) if contexts is not None else 1
        if num_contexts == 1:
            expectations = [self.mab.predict_expectations(contexts)]
        else:
            expectations = self.mab.predict_expectations(contexts)
        print(f'predict_expectations demorou {time.time() - start_time} segundos')
        print('oi2')

        # Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
        # index arm_ind was excluded for context with the index context_ind.
        # The value will be True if it is excluded and those arms will not be returned as part of the results.
        print('criando matriz de exclusão de arms')
        arm_to_index = {arm: arm_ind for arm_ind, arm in enumerate(self.mab.arms)}
        exclude_mask = np.zeros((num_contexts, len(self.mab.arms)), dtype=bool)
        if excluded_arms is not None:
            for context_ind, excluded in tqdm(enumerate(excluded_arms), total=len(excluded_arms)):
                exclude_mask[context_ind][[arm_to_index[arm] for arm in excluded if arm in arm_to_index]] = True

        print('fazendo o restante')
        start_time = time.time()
        # Set excluded item scores to -1, so they automatically get placed lower in best results
        expectations[exclude_mask] = -1.

        # Get best `top_k` results by sorting the expectations
        arm_inds = np.flip(np.argsort(expectations)[:, -self.top_k:], axis=1)

        # Get the list of top_k recommended items and corresponding expectations for each context
        recommendations = [[]] * num_contexts
        scores = [[]] * num_contexts
        for context_ind in range(num_contexts):
            recommendations[context_ind] = [self.mab.arms[arm_ind] for arm_ind in arm_inds[context_ind]
                                            if not exclude_mask[context_ind, arm_ind]]
            if return_scores:
                scores[context_ind] = [expectations[context_ind, arm_ind] for arm_ind in arm_inds[context_ind]
                                       if not exclude_mask[context_ind, arm_ind]]

        print(f'O restante demorou {time.time() - start_time} segundos')
        # Return recommendations and scores
        if return_scores:
            if num_contexts > 1:
                return recommendations, scores
            else:
                return recommendations[0], scores[0]
        else:
            if num_contexts > 1:
                return recommendations
            else:
                return recommendations[0]

In [None]:
print(f'Training LinGreedy - ALS embeddings')
linGreedy_model = BanditRecommenderArmEncoded(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
start_time = time.time()
train_mab(linGreedy_model, current_df_train, ['items_mean'])
print(f'Treino demorou {time.time() - start_time}')

print(f'Testing LinGreedy - ALS embeddings')
hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, ['items_mean'], df_test_for_evaluation, interactions_by_user)
hits

Training LinGreedy - ALS embeddings
arm_to_model demorou -0.3791170120239258
reset_arm_to_status demorou -0.01372838020324707
paralel fit demorou -21.17717933654785


_set_arms_as_trained acabou em -19.91711449623108 segundos
Treino demorou 45.66488790512085
Testing LinGreedy - ALS embeddings
entrou
saiu
oi1
Gerando as predições
Gerar as predições demorou 3.181675910949707 segundos
predict_expectations demorou 3.208477020263672 segundos
oi2
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 283329.38it/s]

fazendo o restante





O restante demorou 8.084055185317993 segundos


100%|██████████| 3799/3799 [00:00<00:00, 16001.21it/s]


114