In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
def load_data():
    df = pd.read_csv("/workspace/gregorio/reinforcement-learning-recsys/1-datasets/bestbuy/interactions.csv", sep=';')
    df = df.rename(columns={
        'id_user': 'user_id',
        'id_item': 'item_id',
    })
    df['response'] = 1
    df = df.sort_values(by='timestamp')
    df = df[['user_id', 'item_id', 'response']]
    df = df.iloc[:int(len(df) * 0.5)]
    df = df.reset_index(drop=True)
    return df

In [3]:
load_data()

Unnamed: 0,user_id,item_id,response
0,496cde27a7d6a3d4989c8a7143f7a7573dcad18e,1658122,1
1,9efe144125a01b1ed7301e9cba939a3f3f33ef13,2969477,1
2,c83a96b80b0fb276a93ec8b5c3cc9df57f53914d,999996500050001,1
3,3ba3fb6612eb4f198673be714dde48d14d2f9d3c,9999161700050000,1
4,2059e46ae923f4227600e8edb620ae898bd30d7d,1283795,1
...,...,...,...
932629,9ced14c84a3acea31f672863b992db9d318c0b8f,3247045,1
932630,7e0ab19bfacd80395955d81dd079fc185436802b,3340175,1
932631,8819d0cd0ba5e78674143ad1bd2ec814f6ff4de8,1708812,1
932632,ac10df6cb48036e5cdd73e9e56e9d954680e7a74,2052149,1


## Testando os modelos MAB usando concatenações de diferentes formas de fazer o contexto

In [4]:
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
import plotly.express as px
import time
import os
import implicit
from mab2rec import BanditRecommender, LearningPolicy

train_data = "../data/ml100k/data_train.csv"
test_data = "../data/ml100k/data_test.csv"

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
FACTORS = 10

In [6]:

def train_embeddings_model(Model, df, num_users, num_items, generate_embeddings=False):
    sparse_matrix = csr_matrix((df['response'], (df['user_id'], df['item_id'])), shape=(num_users, num_items))

    model = Model(factors=FACTORS, random_state=1, num_threads=1)
    model.fit(sparse_matrix)

    if not generate_embeddings:
        return model, sparse_matrix
    
    # # Não precisamos mais do código abaixo, ele funcina para embeddings de usuário, não de itens
    # user_features_list = []

    # for user_id in df['user_id'].unique():
    #    user_factors = model.user_factors[user_id][:FACTORS]  # O BPR coloca 1 no final dos vetores latentes ?
    #    user_features_list.append([user_id] + list(user_factors))

    # df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(FACTORS)])

    # model = model.to_cpu()
    return model, sparse_matrix, model.item_factors, model.user_factors

In [7]:

def test_embeddings_model(model, sparse_matrix, df_test):
    all_recs = []

    start_time = time.time()
    hits = 0
    for _, interaction in tqdm(df_test.iterrows(), total=len(df_test)):
        ids_recs, _ = model.recommend(userid=interaction['user_id'], user_items=sparse_matrix[interaction['user_id']], N=10)
        if interaction['item_id'] in ids_recs:
            hits += 1
        all_recs.append(ids_recs.tolist())
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': all_recs
    })
    
    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [8]:

def train_mab(mab_algo, df_train_with_contexts, contexts_col):
    contexts = get_concat_context(df_train_with_contexts, contexts_col)
    mab_algo.fit(
        decisions=df_train_with_contexts['item_id'],
        rewards=df_train_with_contexts['response'],
        contexts=contexts
    )

In [9]:

def test_non_incremental(mab_algo, contexts_col, df_test, interactions_by_user):
    start_time = time.time()
    hits = 0

    # contexts = df_test.merge(user_features, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
    # contexts = np.array(df_test[contexts_col].tolist())
    print('entrou')
    contexts = get_concat_context(df_test, contexts_col)
    filters = df_test.merge(interactions_by_user, how='left', on='user_id')[['interactions']].values.squeeze(axis=1) 
    print('saiu')

    recomendations = mab_algo.recommend(contexts, filters, apply_sigmoid=False)

    df_test = df_test.reset_index(drop=True)

    hits = 0
    for i, interaction in tqdm(df_test.iterrows(), total=len(df_test)):
        if interaction['item_id'] in recomendations[i]:
            hits += 1
    

    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': recomendations
    })

    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [10]:
def group_interactions_by_user(interactions_df):
    interactions_by_user = interactions_df\
                        .groupby('user_id')[['item_id']]\
                        .apply(lambda df_user: df_user['item_id'].tolist())\
                        .reset_index(name='interactions')
    interactions_by_user = interactions_by_user.reset_index(drop=True)
    return interactions_by_user

In [11]:
def create_contexts_list_items_mean(interactions_df, items_embeddings):
    users_current_info = {}
    contexts = []

    for _, row in tqdm(interactions_df.iterrows(), total=len(interactions_df)):
        user_id = row["user_id"]
        item_id = row["item_id"]

        if user_id not in users_current_info:
            users_current_info[user_id] = {
                'acum_emb': np.zeros((items_embeddings.shape[1], )),
                'count': 0
            }
        
        contexts.append(users_current_info[user_id]['acum_emb'] / max(1, users_current_info[user_id]['count']))

        users_current_info[user_id]['acum_emb'] += items_embeddings[item_id][:items_embeddings.shape[1]]
        users_current_info[user_id]['count'] += 1

    return contexts

In [12]:
def create_contexts_list_items_concat(interactions_df, items_embeddings, window_size):
    users_current_info = {}
    contexts = []

    for _, row in interactions_df.iterrows():
        user_id = row["user_id"]
        item_id = row["item_id"]

        if user_id not in users_current_info:
            users_current_info[user_id] = np.zeros((window_size, items_embeddings.shape[1]))
        
        contexts.append(users_current_info[user_id].flatten())
        
        users_current_info[user_id][1:] = users_current_info[user_id][:-1]
        users_current_info[user_id][0] = items_embeddings[item_id][:items_embeddings.shape[1]]

    return contexts

In [13]:
def create_contexts_list_user(interactions_df, users_embeddings):
    contexts = []

    for _, row in interactions_df.iterrows():
        user_id = row["user_id"]
        contexts.append(users_embeddings[user_id][:users_embeddings.shape[1]])

    return contexts

In [14]:
def get_concat_context(interactions, context_cols):
    # Concat multiple array columns into a single array column
    return np.array(interactions[context_cols].apply(lambda x: np.concatenate((*x, [1])), axis=1).tolist())  # MUDANÇA: adiciona 1 ao final de cada vetor (bias)

In [15]:
from mabwiser.linear import _Linear
from mabwiser.utils import Num, _BaseRNG
from typing import List, Optional

class _LinearArmEncoded(_Linear):

    def __init__(self, rng: _BaseRNG, num_arms: int, n_jobs: int, backend: Optional[str],
                 alpha: Num, epsilon: Num, l2_lambda: Num, regression: str, scale: bool):
        super().__init__(rng, np.arange(num_arms).tolist(), n_jobs, backend, alpha, epsilon, l2_lambda, regression, scale)
        self.num_arms = num_arms
    
    def _vectorized_predict_context(self, contexts: np.ndarray, is_predict: bool) -> List:

        arms = np.arange(self.num_arms)

        # Initializing array with expectations for each arm
        num_contexts = contexts.shape[0]
        arm_expectations = np.empty((num_contexts, self.num_arms), dtype=float)

        # With epsilon probability, assign random flag to context
        random_values = self.rng.rand(num_contexts)
        print('random values')
        print(random_values)
        random_mask = np.array(random_values < self.epsilon)
        random_indices = random_mask.nonzero()[0]

        # For random indices, generate random expectations
        arm_expectations[random_indices] = self.rng.rand((random_indices.shape[0], self.num_arms))

        # For non-random indices, get expectations for each arm
        nonrandom_indices = np.where(~random_mask)[0]
        nonrandom_context = contexts[nonrandom_indices]
        print('Gerando as predições')
        start_time = time.time()
        arm_expectations[nonrandom_indices] = np.array([self.arm_to_model[arm].predict(nonrandom_context)
                                                        for arm in arms]).T
        print(f'Gerar as predições demorou {time.time() - start_time} segundos')

        return arm_expectations if len(arm_expectations) > 1 else arm_expectations[0]
    
    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:

        start_time = time.time()
        # Initialize each model by arm
        self.num_features = contexts.shape[1]
        for arm in self.arms:
            self.arm_to_model[arm].init(num_features=self.num_features)
        print(f'arm_to_model demorou {start_time - time.time()}')


        start_time = time.time()
        # Reset warm started arms
        # self._reset_arm_to_status()
        print(f'reset_arm_to_status demorou {start_time - time.time()}')

        start_time = time.time()
        # Perform parallel fit
        self._parallel_fit(decisions, rewards, contexts)
        print(f'paralel fit demorou {start_time - time.time()}')

        # Update trained arms
        start_time = time.time()
        # Removi o código abaixo pois parece que ele não é usado para o nosso caso...
        # Ele parece ser usado apenas no contexto de tentar fazer "warm" start
        # Basicamente, copiando os mesmos parâmetros de um arm já treinado para um novo (cold) por proximidade de features...

        # Otimizar essa função não parece ser algo tão trivial, já que teria que mudar a estrutura do arm_to_status, tendo que mudar vários outros códigos por causa disso
        # self._set_arms_as_trained(decisions=decisions, is_partial=False)
        print(f'_set_arms_as_trained acabou em {start_time - time.time()} segundos')
    
    def partial_fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None) -> None:
        # Perform parallel fit
        self._parallel_fit(decisions, rewards, contexts)

        # Update trained arms
        # self._set_arms_as_trained(decisions=decisions, is_partial=True)

In [16]:
from mabwiser.mab import MAB, LearningPolicyType, NeighborhoodPolicyType, NeighborhoodPolicy
from typing import List

from mabwiser._version import __author__, __copyright__, __email__, __version__
from mabwiser.approximate import _LSHNearest
from mabwiser.clusters import _Clusters
from mabwiser.greedy import _EpsilonGreedy
from mabwiser.linear import _Linear
from mabwiser.neighbors import _KNearest, _Radius
from mabwiser.popularity import _Popularity
from mabwiser.rand import _Random
from mabwiser.softmax import _Softmax
from mabwiser.thompson import _ThompsonSampling
from mabwiser.treebandit import _TreeBandit
from mabwiser.ucb import _UCB1
from mabwiser.utils import Arm, Constants, check_true, create_rng

class MABArmEncoded(MAB):
    def __init__(self,
                 num_arms: int,  # The list of arms
                 learning_policy: LearningPolicyType,  # The learning policy
                 neighborhood_policy: NeighborhoodPolicyType = None,  # The context policy, optional
                 seed: int = Constants.default_seed,  # The random seed
                 n_jobs: int = 1,  # Number of parallel jobs
                 backend: str = None  # Parallel backend implementation
                 ):
        """Initializes a multi-armed bandit (MAB) with the given arguments.

        Validates the arguments and raises exception in case there are violations.

        Parameters
        ----------
        arms : List[Union[int, float, str]]
            The list of all the arms available for decisions.
            Arms can be integers, strings, etc.
        learning_policy : LearningPolicyType
            The learning policy.
        neighborhood_policy : NeighborhoodPolicyType, optional
            The context policy. Default value is None.
        seed : numbers.Rational, optional
            The random seed to initialize the random number generator.
            Default value is set to Constants.default_seed.value
        n_jobs: int, optional
            This is used to specify how many concurrent processes/threads should be used for parallelized routines.
            Default value is set to 1.
            If set to -1, all CPUs are used.
            If set to -2, all CPUs but one are used, and so on.
        backend: str, optional
            Specify a parallelization backend implementation supported in the joblib library. Supported options are:
            - “loky” used by default, can induce some communication and memory overhead when exchanging input and
              output data with the worker Python processes.
            - “multiprocessing” previous process-based backend based on multiprocessing.Pool. Less robust than loky.
            - “threading” is a very low-overhead backend but it suffers from the Python Global Interpreter Lock if the
              called function relies a lot on Python objects.
            Default value is None. In this case the default backend selected by joblib will be used.

        Raises
        ------
        TypeError:  Arms were not provided in a list.
        TypeError:  Learning policy type mismatch.
        TypeError:  Context policy type mismatch.
        TypeError:  Seed is not an integer.
        TypeError:  Number of parallel jobs is not an integer.
        TypeError:  Parallel backend is not a string.
        TypeError:  For EpsilonGreedy, epsilon must be integer or float.
        TypeError:  For LinGreedy, epsilon must be an integer or float.
        TypeError:  For LinGreedy, l2_lambda must be an integer or float.
        TypeError:  For LinTS, alpha must be an integer or float.
        TypeError:  For LinTS, l2_lambda must be an integer or float.
        TypeError:  For LinUCB, alpha must be an integer or float.
        TypeError:  For LinUCB, l2_lambda must be an integer or float.
        TypeError:  For Softmax, tau must be an integer or float.
        TypeError:  For ThompsonSampling, binarizer must be a callable function.
        TypeError:  For UCB, alpha must be an integer or float.
        TypeError:  For LSHNearest, n_dimensions must be an integer or float.
        TypeError:  For LSHNearest, n_tables must be an integer or float.
        TypeError:  For LSHNearest, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For Clusters, n_clusters must be an integer.
        TypeError:  For Clusters, is_minibatch must be a boolean.
        TypeError:  For Radius, radius must be an integer or float.
        TypeError:  For Radius, no_nhood_prob_of_arm must be None or List that sums up to 1.0.
        TypeError:  For KNearest, k must be an integer or float.

        ValueError: Invalid number of arms.
        ValueError: Invalid values (None, NaN, Inf) in arms.
        ValueError: Duplicate values in arms.
        ValueError: Number of parallel jobs is 0.
        ValueError: For EpsilonGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, epsilon must be between 0 and 1.
        ValueError: For LinGreedy, l2_lambda cannot be negative.
        ValueError: For LinTS, alpha must be greater than zero.
        ValueError: For LinTS, l2_lambda must be greater than zero.
        ValueError: For LinUCB, alpha cannot be negative.
        ValueError: For LinUCB, l2_lambda cannot be negative.
        ValueError: For Softmax, tau must be greater than zero.
        ValueError: For UCB, alpha must be greater than zero.
        ValueError: For LSHNearest, n_dimensions must be gerater than zero.
        ValueError: For LSHNearest, n_tables must be gerater than zero.
        ValueError: For LSHNearest, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For Clusters, n_clusters cannot be less than 2.
        ValueError: For Radius and KNearest, metric is not supported by scipy.spatial.distance.cdist.
        ValueError: For Radius, radius must be greater than zero.
        ValueError: For Radius, if given, no_nhood_prob_of_arm list should sum up to 1.0.
        ValueError: For KNearest, k must be greater than zero.
        """

        # Validate arguments
        # MAB._validate_mab_args(arms, learning_policy, neighborhood_policy, seed, n_jobs, backend)

        # Save the arguments
        self.arms = np.arange(num_arms)
        self.num_arms = num_arms
        self.seed = seed
        self.n_jobs = n_jobs
        self.backend = backend

        # Create the random number generator
        self._rng = create_rng(self.seed)
        self._is_initial_fit = False

        # Create the learning policy implementor
        lp = None
        if isinstance(learning_policy, LearningPolicy.EpsilonGreedy):
            lp = _EpsilonGreedy(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.epsilon)
        elif isinstance(learning_policy, LearningPolicy.Popularity):
            lp = _Popularity(self._rng, self.arms, self.n_jobs, self.backend)
        elif isinstance(learning_policy, LearningPolicy.Random):
            lp = _Random(self._rng, self.arms, self.n_jobs, self.backend)
        elif isinstance(learning_policy, LearningPolicy.Softmax):
            lp = _Softmax(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.tau)
        elif isinstance(learning_policy, LearningPolicy.ThompsonSampling):
            lp = _ThompsonSampling(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.binarizer)
        elif isinstance(learning_policy, LearningPolicy.UCB1):
            lp = _UCB1(self._rng, self.arms, self.n_jobs, self.backend, learning_policy.alpha)
        elif isinstance(learning_policy, LearningPolicy.LinGreedy):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, 0, learning_policy.epsilon,
                         learning_policy.l2_lambda, "ridge", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinTS):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ts", learning_policy.scale)
        elif isinstance(learning_policy, LearningPolicy.LinUCB):
            lp = _LinearArmEncoded(self._rng, num_arms, self.n_jobs, self.backend, learning_policy.alpha, 0,
                         learning_policy.l2_lambda, "ucb", learning_policy.scale)
        else:
            check_true(False, ValueError("Undefined learning policy " + str(learning_policy)))

        # Create the mab implementor
        if neighborhood_policy:
            self.is_contextual = True

            # Do not use parallel fit or predict for Learning Policy when contextual
            lp.n_jobs = 1

            if isinstance(neighborhood_policy, NeighborhoodPolicy.Clusters):
                self._imp = _Clusters(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.n_clusters, neighborhood_policy.is_minibatch)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.LSHNearest):
                self._imp = _LSHNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.n_dimensions, neighborhood_policy.n_tables,
                                        neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.KNearest):
                self._imp = _KNearest(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                      neighborhood_policy.k, neighborhood_policy.metric)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.Radius):
                self._imp = _Radius(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                    neighborhood_policy.radius, neighborhood_policy.metric,
                                    neighborhood_policy.no_nhood_prob_of_arm)
            elif isinstance(neighborhood_policy, NeighborhoodPolicy.TreeBandit):
                self._imp = _TreeBandit(self._rng, self.arms, self.n_jobs, self.backend, lp,
                                        neighborhood_policy.tree_parameters)
            else:
                check_true(False, ValueError("Undefined context policy " + str(neighborhood_policy)))
        else:
            self.is_contextual = isinstance(learning_policy, (LearningPolicy.LinGreedy, LearningPolicy.LinTS,
                                                              LearningPolicy.LinUCB))
            self._imp = lp

In [17]:
from typing import Dict, List, Tuple, Union
from mabwiser.utils import Arm, Num, _BaseRNG
from scipy.special import expit

class BanditRecommenderArmEncoded(BanditRecommender):
    def _init(self, num_arms: int) -> None:
        """Initializes recommender with given list of arms.

        Parameters
        ----------
        arms : List[Union[Arm]]
            The list of all of the arms available for decisions.
            Arms can be integers, strings, etc.

        Returns
        -------
        Returns nothing
        """
        self.mab = MABArmEncoded(num_arms, self.learning_policy, self.neighborhood_policy, self.seed, self.n_jobs, self.backend)
    
    def fit(self, decisions: Union[List[Arm], np.ndarray, pd.Series],
            rewards: Union[List[Num], np.ndarray, pd.Series],
            contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None) -> None:
        """Fits the recommender the given *decisions*, their corresponding *rewards* and *contexts*, if any.
        If the recommender arms has not been initialized using the `set_arms`, the recommender arms will be set
        to the list of arms in *decisions*.

        Validates arguments and raises exceptions in case there are violations.

        This function makes the following assumptions:
            - each decision corresponds to an arm of the bandit.
            - there are no ``None``, ``Nan``, or ``Infinity`` values in the contexts.

        Parameters
        ----------
         decisions : Union[List[Arm], np.ndarray, pd.Series]
            The decisions that are made.
         rewards : Union[List[Num], np.ndarray, pd.Series]
            The rewards that are received corresponding to the decisions.
         contexts : Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame], default=None
            The context under which each decision is made.

        Returns
        -------
        Returns nothing.
        """
        if self.mab is None:
            self._init(np.unique(decisions).shape[0])
        self.mab.fit(decisions, rewards, contexts)
    
    def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
            -> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
                     Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
        self._validate_mab(is_fit=True)
        self._validate_get_rec(contexts, excluded_arms)

        print('oi1')
        start_time = time.time()
        # Get predicted expectations
        num_contexts = len(contexts) if contexts is not None else 1
        if num_contexts == 1:
            expectations = [self.mab.predict_expectations(contexts)]
        else:
            expectations = self.mab.predict_expectations(contexts)
        print(f'predict_expectations demorou {time.time() - start_time} segundos')
        print('oi2')

        if apply_sigmoid:
            expectations = expit(expectations)

        # Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
        # index arm_ind was excluded for context with the index context_ind.
        # The value will be True if it is excluded and those arms will not be returned as part of the results.
        print('criando matriz de exclusão de arms')
        arm_to_index = {arm: arm_ind for arm_ind, arm in enumerate(self.mab.arms)}
        exclude_mask = np.zeros((num_contexts, len(self.mab.arms)), dtype=bool)
        if excluded_arms is not None:
            for context_ind, excluded in tqdm(enumerate(excluded_arms), total=len(excluded_arms)):
                exclude_mask[context_ind][[arm_to_index[arm] for arm in excluded if arm in arm_to_index]] = True

        # Set excluded item scores to -1, so they automatically get placed lower in best results
        expectations[exclude_mask] = -1.

        print('fazendo a ordenação top-K')
        start_time = time.time()
        # Get best `top_k` results by sorting the expectations
        arm_inds = np.argpartition(-expectations, self.top_k - 1, axis=1)[:, :self.top_k]
        arm_inds = arm_inds[np.arange(arm_inds.shape[0]).reshape(-1, 1), np.argsort(-expectations[np.arange(expectations.shape[0]).reshape(-1, 1), arm_inds], axis=1)]
        print(f'demorou {time.time() - start_time} segundos')
        

        print('gerando lista de recomendações')
        start_time = time.time()
        # Get the list of top_k recommended items and corresponding expectations for each context
        recommendations = [[]] * num_contexts
        scores = [[]] * num_contexts
        for context_ind in range(num_contexts):
            recommendations[context_ind] = [self.mab.arms[arm_ind] for arm_ind in arm_inds[context_ind]
                                            if not exclude_mask[context_ind, arm_ind]]
            if return_scores:
                scores[context_ind] = [expectations[context_ind, arm_ind] for arm_ind in arm_inds[context_ind]
                                       if not exclude_mask[context_ind, arm_ind]]
        print(f'demorou {time.time() - start_time} segundos')
        # Return recommendations and scores
        if return_scores:
            if num_contexts > 1:
                return recommendations, scores
            else:
                return recommendations[0], scores[0]
        else:
            if num_contexts > 1:
                return recommendations
            else:
                return recommendations[0]

In [18]:

def test(test_size, train_initial_size, train_extra_increment_step_size, windows_sizes):
    '''
    - `test_size`: define o tamanho da partição de teste no train/test split inicial. Por exemplo, se for escolhido 0.1 (10%), a partição de teste terá 10% das interações e a partição de treino terá 90% das interações. O tamanho da partição de teste passará ainda por um filtro com o tamanho do treino inicial, definido no próximo parâmetro.
    - `train_initial_size`: define o tamanho inicial que será usado para treino dos modelos. Esse tamanho é uma porcentagem da partição de treino, por exemplo, 0.5 (50%) quer dizer que o treino será feito inicialmente com 50% das interações separadas para treino. Vale ressaltar que essa porcentagem é relacionada apenas à partição de treino, então, se temos uma partição de treino de 0.9 (90%) e o “train_initial_size” é definido como 0.5 (50%), então, teremos 45% (0.9 * 0.5) das interações todas para o treino inicial. Com a base de treino separada com essa porcentagem inicial, a base de teste passara por um filtro, removendo todas as interações com itens ou usuários que nunca foram vistos nesse treino inicial.
    - `train_extra_increment_step_size`: define a porcentagem do "treinamento extra" que será usado. No início a base de dados é separada em treino inicial (train_initial_size), "treinamento extra" e teste. O "treinamento extra", assim como o teste, passa por um filtro para remover interações com itens ou usuários que nunca foram vistos no treino inicial. Após o treino inicial, o "treinamento extra" é usado para treinar os modelos de embeddings e os modelos de bandit. O "treinamento extra" é incrementado a cada iteração, de acordo com o valor desse parâmetro. Por exemplo, se o `train_extra_increment_step_size` é 0.1 (10%), então, a cada iteração, 10% das interações são adicionadas ao treino, até que todo o "treinamento extra" seja usado.
    - `windows_sizes`: tamanho das janelas de contextos que serão usadas para teste. Por exemplo, se for passado [3, 5, 7], as janelas de tamanho de 3, 5 e 7 serão usadas como contexto para treinar os modelos de MAB (gerando resultados diferentes para cada tamanho de janela).
    '''
    results = []
    df_recs = pd.DataFrame(columns=['algorithm', 'interaction_number', 'user_id', 'item_id', 'recommendations'])
    # df_train = pd.read_csv(train_data)
    # df_test = pd.read_csv(test_data)

    df_full = load_data()

    df_full['user_id'] = pd.factorize(df_full['user_id'])[0]
    df_full['item_id'] = pd.factorize(df_full['item_id'])[0]

    num_users = df_full['user_id'].nunique()
    num_items = df_full['item_id'].nunique()

    split_index = int(len(df_full) * (1 - test_size))
    df_train_full = df_full[:split_index]
    df_test = df_full[split_index:]

    initial_df_train = df_train_full[:int(len(df_train_full) * train_initial_size)]
    extra_df_train = df_train_full[int(len(df_train_full) * train_initial_size):]
    extra_df_train = extra_df_train[(extra_df_train['user_id'].isin(initial_df_train['user_id'])) & (extra_df_train['item_id'].isin(initial_df_train['item_id']))]
    extra_df_train = extra_df_train.reset_index(drop=True)

    df_test = df_test[(df_test['user_id'].isin(initial_df_train['user_id'])) & (df_test['item_id'].isin(initial_df_train['item_id']))]
    df_test = df_test.reset_index(drop=True)
    df_test_for_evaluation = df_test[df_test['response'] == 1]
    df_test_for_evaluation = df_test_for_evaluation.reset_index(drop=True)

    print('Generating ALS embeddings')
    ALS_model, _, ALS_item_embeddings, ALS_user_embeddings = train_embeddings_model(implicit.als.AlternatingLeastSquares, initial_df_train, num_users, num_items, generate_embeddings=True)

    print('Generating BPR embeddings')
    BPR_model, _, BPR_item_embeddings, BPR_user_embeddings = train_embeddings_model(implicit.bpr.BayesianPersonalizedRanking, initial_df_train, num_users, num_items, generate_embeddings=True)

    '''
    for window_size in windows_sizes:
        print(f'Generating contexts for window size of {window_size} (contat items emb)')
        df_full_new = pd.concat([initial_df_train, extra_df_train, df_test_for_evaluation])
        als_contexts = create_contexts_list_items_concat(df_full_new, ALS_item_embeddings, window_size)
        bpr_contexts = create_contexts_list_items_concat(df_full_new, BPR_item_embeddings, window_size)

        initial_df_train[f'als_context_item_concat_{window_size}'] = als_contexts[:len(initial_df_train)]
        initial_df_train[f'bpr_context_item_concat_{window_size}'] = bpr_contexts[:len(initial_df_train)]

        extra_df_train[f'als_context_item_concat_{window_size}'] = als_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]
        extra_df_train[f'bpr_context_item_concat_{window_size}'] = bpr_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]

        df_test_for_evaluation[f'als_context_item_concat_{window_size}'] = als_contexts[len(initial_df_train) + len(extra_df_train):]
        df_test_for_evaluation[f'bpr_context_item_concat_{window_size}'] = bpr_contexts[len(initial_df_train) + len(extra_df_train):]

    print('Generating contexts for user embeddings')
    df_full_new = pd.concat([initial_df_train, extra_df_train, df_test_for_evaluation])
    als_contexts = create_contexts_list_user(df_full_new, ALS_user_embeddings)
    bpr_contexts = create_contexts_list_user(df_full_new, BPR_user_embeddings)

    initial_df_train['als_context_user'] = als_contexts[:len(initial_df_train)]
    initial_df_train['bpr_context_user'] = bpr_contexts[:len(initial_df_train)]

    extra_df_train['als_context_user'] = als_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]
    extra_df_train['bpr_context_user'] = bpr_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]

    df_test_for_evaluation['als_context_user'] = als_contexts[len(initial_df_train) + len(extra_df_train):]
    df_test_for_evaluation['bpr_context_user'] = bpr_contexts[len(initial_df_train) + len(extra_df_train):]
    '''
    
    print('Generating contexts for item mean embeddings')
    df_full_new = pd.concat([initial_df_train, extra_df_train, df_test_for_evaluation])
    als_contexts = create_contexts_list_items_mean(df_full_new, ALS_item_embeddings)
    bpr_contexts = create_contexts_list_items_mean(df_full_new, BPR_item_embeddings)

    initial_df_train['als_context_items_mean'] = als_contexts[:len(initial_df_train)]
    initial_df_train['bpr_context_items_mean'] = bpr_contexts[:len(initial_df_train)]

    extra_df_train['als_context_items_mean'] = als_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]
    extra_df_train['bpr_context_items_mean'] = bpr_contexts[len(initial_df_train):len(initial_df_train) + len(extra_df_train)]

    df_test_for_evaluation['als_context_items_mean'] = als_contexts[len(initial_df_train) + len(extra_df_train):]
    df_test_for_evaluation['bpr_context_items_mean'] = bpr_contexts[len(initial_df_train) + len(extra_df_train):]

    algos_dict = {
        # 'item_concat': {
        #     'item_concat': True,
        #     'item_mean': False,
        #     'user': False
        # },
        'item_mean': {
            'item_concat': False,
            'item_mean': True,
            'user': False
        },
        # 'user': {
        #     'item_concat': False,
        #     'item_mean': False,
        #     'user': True
        # },
        # 'item_concat-item_mean': {
        #     'item_concat': True,
        #     'item_mean': True,
        #     'user': False
        # },
        # 'item_concat-user': {
        #     'item_concat': True,
        #     'item_mean': False,
        #     'user': True
        # },
        # 'item_mean-user': {
        #     'item_concat': False,
        #     'item_mean': True,
        #     'user': True
        # },
        # 'all': {
        #     'item_concat': True,
        #     'item_mean': True,
        #     'user': True
        # }
    }

    for algo_name, _ in algos_dict.items():
        algos_dict[algo_name]['results'] = []
        algos_dict[algo_name]['df_recs'] = pd.DataFrame(columns=['algorithm', 'interaction_number', 'user_id', 'item_id', 'recommendations'])

    def save_algo_result(algo_name, hits, hr, spent_time, df_recs_algo, current_extra_train_size, results):
        df_recs_algo['algorithm'] = algo_name
        df_recs_algo['train_size'] = current_extra_train_size
        df_recs_new = pd.concat([df_recs, df_recs_algo])
        results.append({'algorithm': algo_name, 'hits': hits, 'hr': hr, 'time': spent_time, 'train_size': current_extra_train_size})
        return df_recs_new

    current_extra_train_size = 0
    while current_extra_train_size <= 1:
        print(f"Current extra train size: {current_extra_train_size}")

        current_df_train = pd.concat([initial_df_train, extra_df_train[:int(len(extra_df_train) * current_extra_train_size)]])
        interactions_by_user = group_interactions_by_user(current_df_train)  # MUDANÇA AQUI

        # -------------- ALS -----------------
        print('Training ALS')
        ALS_model, sparse_matrix = train_embeddings_model(implicit.als.AlternatingLeastSquares, current_df_train, num_users, num_items)

        print('Testing ALS')
        hits, hr, spent_time, df_recs_als = test_embeddings_model(ALS_model, sparse_matrix, df_test_for_evaluation)
        df_recs = save_algo_result('ALS', hits, hr, spent_time, df_recs_als, current_extra_train_size, results)

        # -------------- BPR -----------------
        print('Training BPR')
        BPR_model, sparse_matrix = train_embeddings_model(implicit.bpr.BayesianPersonalizedRanking, current_df_train, num_users, num_items)

        print('Testing BPR')
        hits, hr, spent_time, df_recs_bpr = test_embeddings_model(BPR_model, sparse_matrix, df_test_for_evaluation)
        df_recs = save_algo_result('BPR', hits, hr, spent_time, df_recs_bpr, current_extra_train_size, results)
        
        for algo_name, algo_dict in algos_dict.items():
            if algo_dict['item_concat']:
                windows = windows_sizes
            else:
                windows = [None]
            
            for window_size in windows:
                als_embeddings_cols = []
                bpr_embeddings_cols = []
                print_extra = f' - {algo_name}'
                algo_name_extra = ''
                if algo_dict['item_concat']:
                    als_embeddings_cols.append(f'als_context_item_concat_{window_size}')
                    bpr_embeddings_cols.append(f'bpr_context_item_concat_{window_size}')
                    print_extra = f' - {algo_name} - {window_size}'
                    algo_name_extra = f' - {window_size}'
                if algo_dict['item_mean']:
                    als_embeddings_cols.append('als_context_items_mean')
                    bpr_embeddings_cols.append('bpr_context_items_mean')
                if algo_dict['user']:
                    als_embeddings_cols.append('als_context_user')
                    bpr_embeddings_cols.append('bpr_context_user')
                
                # ------ LinUCB - ALS embeddings -------
                print(f'Training LinUCB - ALS embeddings{print_extra}')
                linUCB_model = BanditRecommenderArmEncoded(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
                start_time = time.time()
                train_mab(linUCB_model, current_df_train, als_embeddings_cols)  # Mudança no treinamento dos MAB
                print(f'Treinamento demorou {time.time() - start_time} segundos')

                print(f'Testing LinUCB - ALS embeddings{print_extra}')
                hits, hr, spent_time, df_recs_linUCB = test_non_incremental(linUCB_model, als_embeddings_cols, df_test_for_evaluation, interactions_by_user)
                algo_dict['df_recs'] = save_algo_result(f'LinUCB - ALS embeddings{algo_name_extra}', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size, algo_dict['results'])


                # ------ LinUCB - BPR embeddings -------
                print(f'Training LinUCB - BPR embeddings{print_extra}')
                linUCB_model = BanditRecommenderArmEncoded(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
                train_mab(linUCB_model, current_df_train, bpr_embeddings_cols)

                print(f'Testing LinUCB - BPR embeddings{print_extra}')
                hits, hr, spent_time, df_recs_linUCB = test_non_incremental(linUCB_model, bpr_embeddings_cols, df_test_for_evaluation, interactions_by_user)
                algo_dict['df_recs'] = save_algo_result(f'LinUCB - BPR embeddings{algo_name_extra}', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size, algo_dict['results'])

                # ------ LinGreedy - ALS embeddings -------
                print(f'Training LinGreedy - ALS embeddings{print_extra}')
                linGreedy_model = BanditRecommenderArmEncoded(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
                train_mab(linGreedy_model, current_df_train, als_embeddings_cols)

                print(f'Testing LinGreedy - ALS embeddings{print_extra}')
                hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, als_embeddings_cols, df_test_for_evaluation, interactions_by_user)
                algo_dict['df_recs'] = save_algo_result(f'LinGreedy - ALS embeddings{algo_name_extra}', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size, algo_dict['results'])


                # ------ LinGreedy - BPR embeddings -------
                print(f'Training LinGreedy - BPR embeddings{print_extra}')
                linGreedy_model = BanditRecommenderArmEncoded(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
                train_mab(linGreedy_model, current_df_train, bpr_embeddings_cols)

                print(f'Testing LinGreedy - BPR embeddings{print_extra}')
                hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, bpr_embeddings_cols, df_test_for_evaluation, interactions_by_user)
                algo_dict['df_recs'] = save_algo_result(f'LinGreedy - BPR embeddings{algo_name_extra}', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size, algo_dict['results'])
        
        # Incrementando o tamanho do treino para próxima iteração
        current_extra_train_size = round(current_extra_train_size + train_extra_increment_step_size, 2)
    
    save_path = f'results-v15/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    df_results_als_bpr = pd.DataFrame(results)
    for algo_name, algo_dict in algos_dict.items():
        df_results_final = pd.DataFrame(algo_dict['results'])
        df_results_final = pd.concat([df_results_final, df_results_als_bpr])
        df_results_final = df_results_final.astype({'hits': int, 'hr': float, 'time': float})
        df_results_final['test_size'] = round(test_size, 2)
        df_results_final['test_interactions'] = len(df_test_for_evaluation)

        df_results_final.to_csv(f'{save_path}/results-{algo_name}.csv', index=False)

        df_recs_final = pd.concat([df_recs, algo_dict['df_recs']])
        df_recs_final.to_csv(f'{save_path}/recs-{algo_name}.csv', index=False)

In [19]:
test(test_size=0.1, train_initial_size=0.5, train_extra_increment_step_size=10, windows_sizes=[1,2,3,4,5])

Generating ALS embeddings


100%|██████████| 15/15 [00:36<00:00,  2.45s/it]


Generating BPR embeddings


100%|██████████| 100/100 [00:01<00:00, 93.61it/s, train_auc=92.25%, skipped=0.46%]


Generating contexts for item mean embeddings


100%|██████████| 461136/461136 [00:32<00:00, 14299.98it/s]
100%|██████████| 461136/461136 [00:31<00:00, 14562.77it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_df_train['als_context_items_mean'] = als_contexts[:len(initial_df_train)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_df_train['bpr_context_items_mean'] = bpr_contexts[:len(initial_df_train)]


Current extra train size: 0
Training ALS


100%|██████████| 15/15 [00:37<00:00,  2.48s/it]


Testing ALS


100%|██████████| 3799/3799 [00:04<00:00, 790.50it/s]


Training BPR


100%|██████████| 100/100 [00:01<00:00, 86.81it/s, train_auc=92.24%, skipped=0.46%]


Testing BPR


100%|██████████| 3799/3799 [00:04<00:00, 812.65it/s]


Training LinUCB - ALS embeddings - item_mean
arm_to_model demorou -0.4441554546356201
reset_arm_to_status demorou -4.0531158447265625e-06
paralel fit demorou -22.15251326560974
_set_arms_as_trained acabou em -4.76837158203125e-07 segundos
Treinamento demorou 27.821632623672485 segundos
Testing LinUCB - ALS embeddings - item_mean
entrou
saiu
oi1
random values
[0.22733602 0.31675834 0.79736546 ... 0.11546996 0.43684616 0.5604927 ]
Gerando as predições
Gerar as predições demorou 24.15371322631836 segundos
predict_expectations demorou 24.154807090759277 segundos
oi2
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 127313.38it/s]

fazendo a ordenação top-K





demorou 1.5695433616638184 segundos
gerando lista de recomendações
demorou 0.024473190307617188 segundos


100%|██████████| 3799/3799 [00:00<00:00, 15580.50it/s]


Training LinUCB - BPR embeddings - item_mean
arm_to_model demorou -0.40575742721557617
reset_arm_to_status demorou -3.814697265625e-06
paralel fit demorou -20.364905834197998
_set_arms_as_trained acabou em -2.384185791015625e-07 segundos
Testing LinUCB - BPR embeddings - item_mean
entrou
saiu
oi1
random values
[0.22733602 0.31675834 0.79736546 ... 0.11546996 0.43684616 0.5604927 ]
Gerando as predições
Gerar as predições demorou 24.6965389251709 segundos
predict_expectations demorou 24.697410583496094 segundos
oi2
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 132687.37it/s]

fazendo a ordenação top-K





demorou 1.582766056060791 segundos
gerando lista de recomendações
demorou 0.02153182029724121 segundos


100%|██████████| 3799/3799 [00:00<00:00, 15499.40it/s]


Training LinGreedy - ALS embeddings - item_mean
arm_to_model demorou -0.35843563079833984
reset_arm_to_status demorou -1.1920928955078125e-06
paralel fit demorou -20.642829656600952
_set_arms_as_trained acabou em -4.76837158203125e-07 segundos
Testing LinGreedy - ALS embeddings - item_mean
entrou
saiu
oi1
random values
[0.22733602 0.31675834 0.79736546 ... 0.11546996 0.43684616 0.5604927 ]
Gerando as predições
Gerar as predições demorou 3.3813955783843994 segundos
predict_expectations demorou 3.40777325630188 segundos
oi2
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 159505.90it/s]

fazendo a ordenação top-K





demorou 1.553048849105835 segundos
gerando lista de recomendações
demorou 0.019666433334350586 segundos


100%|██████████| 3799/3799 [00:00<00:00, 15358.33it/s]


Training LinGreedy - BPR embeddings - item_mean
arm_to_model demorou -0.3487551212310791
reset_arm_to_status demorou -9.5367431640625e-07
paralel fit demorou -21.007943868637085
_set_arms_as_trained acabou em -2.384185791015625e-07 segundos
Testing LinGreedy - BPR embeddings - item_mean
entrou
saiu
oi1
random values
[0.22733602 0.31675834 0.79736546 ... 0.11546996 0.43684616 0.5604927 ]
Gerando as predições
Gerar as predições demorou 3.4279470443725586 segundos
predict_expectations demorou 3.453890562057495 segundos
oi2
criando matriz de exclusão de arms


100%|██████████| 3799/3799 [00:00<00:00, 162464.17it/s]

fazendo a ordenação top-K





demorou 1.555213212966919 segundos
gerando lista de recomendações
demorou 0.02155470848083496 segundos


100%|██████████| 3799/3799 [00:00<00:00, 15516.58it/s]


In [20]:
df_results = pd.read_csv('results-v15/results-item_mean.csv')

In [21]:
#fig = px.line(df_results, x="train_size", y="hr", color='algorithm', title='HR x Train size')
#fig.show()
#fig.write_html('results-v15/hr_x_train_size.html')

In [22]:
df_results

Unnamed: 0,algorithm,hits,hr,time,train_size,test_size,test_interactions
0,LinUCB - ALS embeddings,22,0.005791,26.198028,0,0.1,3799
1,LinUCB - BPR embeddings,3,0.00079,26.753603,0,0.1,3799
2,LinGreedy - ALS embeddings,114,0.030008,5.408048,0,0.1,3799
3,LinGreedy - BPR embeddings,56,0.014741,5.459213,0,0.1,3799
4,ALS,89,0.023427,4.816372,0,0.1,3799
5,BPR,25,0.006581,4.68415,0,0.1,3799


In [None]:
def remove_incremental(df_results):
    new_df = df_results[(~df_results['algorithm'].str.contains('incremental') | df_results['algorithm'].str.contains('non-incremental'))]
    new_df['algorithm'] = new_df['algorithm'].str.replace(' - non-incremental', '')
    return new_df

In [None]:
def transform_in_upper_and_lower_bounds(df_results, algo_names):
    train_sizes = sorted(df_results['train_size'].unique().tolist())
    min_train_size = train_sizes[0]
    max_train_size = train_sizes[-1]
    qnt_train_sizes = len(train_sizes)

    for algo_name in algo_names:
        algo_row_lower = df_results[(df_results['algorithm'] == algo_name) & (df_results['train_size'] == min_train_size)]
        df_lower = pd.DataFrame({
            'algorithm': [f'{algo_name} lower'] * qnt_train_sizes,
            'hits': [algo_row_lower['hits'].values[0]] * qnt_train_sizes,
            'hr': [algo_row_lower['hr'].values[0]] * qnt_train_sizes,
            'time': [algo_row_lower['time'].values[0]] * qnt_train_sizes,
            'train_size': train_sizes,
            'test_size': [algo_row_lower['test_size'].values[0]] * qnt_train_sizes,
            'test_interactions': [algo_row_lower['test_interactions'].values[0]] * qnt_train_sizes
        })
        df_results = pd.concat([df_results, df_lower])

        algo_row_upper = df_results[(df_results['algorithm'] == algo_name) & (df_results['train_size'] == max_train_size)]
        df_upper = pd.DataFrame({
            'algorithm': [f'{algo_name} upper'] * qnt_train_sizes,
            'hits': [algo_row_upper['hits'].values[0]] * qnt_train_sizes,
            'hr': [algo_row_upper['hr'].values[0]] * qnt_train_sizes,
            'time': [algo_row_upper['time'].values[0]] * qnt_train_sizes,
            'train_size': train_sizes,
            'test_size': [algo_row_upper['test_size'].values[0]] * qnt_train_sizes,
            'test_interactions': [algo_row_upper['test_interactions'].values[0]] * qnt_train_sizes
        })
        df_results = pd.concat([df_results, df_upper])

        df_results = df_results[df_results['algorithm'] != algo_name]
    
    return df_results

In [None]:
import plotly.graph_objects as go

def plot_results(df_results, save_root):
    df_results = remove_incremental(df_results)
    df_results = transform_in_upper_and_lower_bounds(df_results, ['ALS', 'BPR'])

    algos_configs = {
        'ALS upper': {'color': 'blue', 'dash': 'dash'},
        'ALS lower': {'color': 'blue', 'dash': 'dash'},
        'BPR upper': {'color': 'red', 'dash': 'dash'},
        'BPR lower': {'color': 'red', 'dash': 'dash'},
        'LinUCB - ALS embeddings': {'color': 'green', 'dash': 'solid'},
        'LinUCB - BPR embeddings': {'color': 'purple', 'dash': 'solid'},
        'LinGreedy - ALS embeddings': {'color': 'orange', 'dash': 'solid'},
        'LinGreedy - BPR embeddings': {'color': 'pink', 'dash': 'solid'}
    }

    fig = go.Figure()
    for algo_name, config in algos_configs.items():
        df_algo = df_results[df_results['algorithm'] == algo_name]
        fig.add_trace(go.Scatter(x=df_algo['train_size'], y=df_algo['hr'], mode='lines', name=algo_name, line=dict(color=config['color'], dash=config['dash'])))
    
    fig.update_layout(title='HR x Train size', xaxis_title='Train size', yaxis_title='HR')
    # fig.show()

    fig.write_html(f'{save_root}/hr_x_train_size.html')
    # fig.write_image(f'{save_root}/hr_x_train_size.png')

In [None]:
# display(df_results)

plot_results(df_results, 'results-v15')

Unnamed: 0,algorithm,hits,hr,time,train_size,test_size,test_interactions
0,LinUCB - ALS embeddings,16,0.004212,25.781378,0.0,0.1,3799
1,LinUCB - BPR embeddings,4,0.001053,25.469453,0.0,0.1,3799
2,LinGreedy - ALS embeddings,86,0.022638,5.340262,0.0,0.1,3799
3,LinGreedy - BPR embeddings,46,0.012108,5.343321,0.0,0.1,3799
4,LinUCB - ALS embeddings,16,0.004212,25.531457,0.1,0.1,3799
...,...,...,...,...,...,...,...
61,BPR,42,0.011056,4.304660,0.8,0.1,3799
62,ALS,97,0.025533,4.499696,0.9,0.1,3799
63,BPR,42,0.011056,4.294375,0.9,0.1,3799
64,ALS,96,0.025270,4.567436,1.0,0.1,3799
