# Filtrando itens já consumido mab2rec

## Testes iniciais de filtragem

Antes de colocar no código principal, abaixo estão alguns testes que realizei para entender melhor o funcionamento do filtro do mab2rec e como gerar os ids de itens para a filtragem

In [1]:
import pandas as pd
from mab2rec import BanditRecommender, LearningPolicy
from mab2rec.pipeline import train

train_data = "../data/ml100k/data_train.csv"
test_data = "../data/ml100k/data_test.csv"

In [2]:
df_test = pd.DataFrame([
    {
        'user_id': 1,
        'item_id': 1,
        'response': 1
    },
    {
        'user_id': 2,
        'item_id': 2,
        'response': 1
    },
    {
        'user_id': 2,
        'item_id': 3,
        'response': 1
    },
    {
        'user_id': 2,
        'item_id': 4,
        'response': 1
    },
    {
        'user_id': 2,
        'item_id': 5,
        'response': 1
    },
    {
        'user_id': 3,
        'item_id': 2,
        'response': 1
    }
])

contexts = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

contexts_df = pd.DataFrame({
    'user_id': [1, 2, 3],
    'u0': [contexts[0][0], contexts[1][0], contexts[2][0]],
    'u1': [contexts[0][1], contexts[1][1], contexts[2][1]],
    'u2': [contexts[0][2], contexts[1][2], contexts[2][2]]
})
print(contexts_df)

   user_id  u0  u1  u2
0        1   1   0   0
1        2   0   1   0
2        3   0   0   1


In [3]:
rec_model = BanditRecommender(LearningPolicy.LinGreedy(0.1), top_k=1)
train(rec_model, data=df_test, user_features=contexts_df)

In [4]:
rec_model.recommend(contexts=[[1, 0, 0]])  # Retorna 1, que já foi consumido pelo usuário 1

[1]

In [5]:
rec_model.recommend(contexts=[[1, 0, 0]], excluded_arms=[[1]])

[5]

In [6]:
rec_model.recommend(contexts=[[1, 0, 0]], excluded_arms=[[1, 5]])

[4]

In [7]:
def group_interactions_by_user(interactions_df):
    interactions_by_user = interactions_df\
                        .groupby('user_id')[['item_id']]\
                        .apply(lambda df_user: df_user['item_id'].tolist())\
                        .reset_index(name='interactions')
    interactions_by_user = interactions_by_user.reset_index(drop=True)
    return interactions_by_user

In [8]:
grouped = group_interactions_by_user(df_test)
grouped

Unnamed: 0,user_id,interactions
0,1,[1]
1,2,"[2, 3, 4, 5]"
2,3,[2]


In [9]:
grouped['interactions'].tolist()

[[1], [2, 3, 4, 5], [2]]

In [10]:
filters = df_test.merge(grouped, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
print(filters)

[[list([1])]
 [list([2, 3, 4, 5])]
 [list([2, 3, 4, 5])]
 [list([2, 3, 4, 5])]
 [list([2, 3, 4, 5])]
 [list([2])]]


In [11]:
filters.shape

(6, 1)

In [12]:
filters = filters.squeeze(axis=1)
print(filters)
print(filters.shape)

[list([1]) list([2, 3, 4, 5]) list([2, 3, 4, 5]) list([2, 3, 4, 5])
 list([2, 3, 4, 5]) list([2])]
(6,)


In [13]:
contexts = df_test.merge(contexts_df, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
filters = df_test.merge(grouped, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values.squeeze(axis=1)

rec_model.recommend(contexts=contexts, excluded_arms=filters)

[[5], [1], [1], [1], [1], [5]]

In [14]:
teste_squeeze = df_test.iloc[1:2].merge(grouped, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values.squeeze()
print(teste_squeeze.shape)

teste_squeeze = df_test.iloc[1:2].merge(grouped, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values.squeeze(axis=1)
teste_squeeze.shape  
# Precisa colocar o axis=1 pois quando tiver apenas um usuário, vai retornar um array de shape inexistente/inválido

()


(1,)

## Testando os modelos MAB com o filtro

In [15]:
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
import plotly.express as px
import time
import os

train_data = "../data/ml100k/data_train.csv"
test_data = "../data/ml100k/data_test.csv"

In [16]:
FACTORS = 10

In [17]:

import implicit.cpu


def train_embeddings_model(Model, df, num_users, num_items, generate_embeddings=False):
    sparse_matrix = csr_matrix((df['response'], (df['user_id'], df['item_id'])), shape=(num_users, num_items))

    model = Model(factors=FACTORS, random_state=1, num_threads=1)
    model.fit(sparse_matrix)

    if not generate_embeddings:
        return model, sparse_matrix
    
    user_features_list = []

    for user_id in df['user_id'].unique():
        user_factors = model.user_factors[user_id][:FACTORS]  # O BPR coloca 1 no final dos vetores latentes ?
        user_features_list.append([user_id] + list(user_factors))

    df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(FACTORS)])

    return model, sparse_matrix, df_user_features

In [18]:

def test_embeddings_model(model, sparse_matrix, df_test):
    all_recs = []

    start_time = time.time()
    hits = 0
    for _, interaction in df_test.iterrows():
        ids_recs, _ = model.recommend(userid=interaction['user_id'], user_items=sparse_matrix[interaction['user_id']], N=10)
        if interaction['item_id'] in ids_recs:
            hits += 1
        all_recs.append(ids_recs.tolist())
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': all_recs
    })
    
    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [19]:

def test_non_incremental(mab_algo, user_features, df_test, interactions_by_user):
    start_time = time.time()
    hits = 0

    contexts = df_test.merge(user_features, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
    filters = df_test.merge(interactions_by_user, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values.squeeze(axis=1)  # MUDANÇA AQUI

    recomendations = mab_algo.recommend(contexts, filters)  # USANDO OS FILTROS

    df_test = df_test.reset_index(drop=True)

    hits = 0
    for i, interaction in df_test.iterrows():
        if interaction['item_id'] in recomendations[i]:
            hits += 1
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': recomendations
    })

    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [20]:

def test_incremental(mab_algo, user_features, df_test, df_test_for_evaluation, batch_size, interactions_by_user):
    recs = []

    start_time = time.time()
    hits = 0

    for i in range(0, len(df_test), batch_size):
        # Fazendo recomendações para teste
        df_batch_test = df_test_for_evaluation.loc[i:i+batch_size-1]
        contexts = df_batch_test.merge(user_features, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values
        filters = df_batch_test.merge(interactions_by_user, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values.squeeze(axis=1)  # MUDANÇA AQUI

        if len(contexts) > 0: # Se não tiver nenhuma interação positiva, não faz sentido fazer recomendações
            recomendations = mab_algo.recommend(contexts, filters)  # USANDO OS FILTROS
            if isinstance(recomendations, list) and isinstance(recomendations[0], int):
                # Quando o contexto tem tamanho 1, a recomendação é uma lista, e não uma lista de listas
                recomendations = [recomendations]

            df_batch_test = df_batch_test.reset_index(drop=True)

            for j, interaction in df_batch_test.iterrows():
                if interaction['item_id'] in recomendations[j]:
                    hits += 1
            
            recs.extend(recomendations)
        
        # Treinando com o batch
        df_batch_train = df_test.loc[i:i+batch_size-1]
        contexts = df_batch_train.merge(user_features, how='left', on='user_id').drop(columns=['user_id', 'item_id', 'response']).values

        mab_algo.partial_fit(df_batch_train['item_id'], df_batch_train['response'], contexts)
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test_for_evaluation))],
        'user_id': df_test_for_evaluation['user_id'],
        'item_id': df_test_for_evaluation['item_id'],
        'recommendations': recs
    })

    return hits, hits/len(df_test_for_evaluation), time.time() - start_time, recs_df

In [21]:

def test(test_size, train_initial_size, train_extra_increment_step_size):
    '''
    - `test_size`: define o tamanho da partição de teste no train/test split inicial. Por exemplo, se for escolhido 0.1 (10%), a partição de teste terá 10% das interações e a partição de treino terá 90% das interações. O tamanho da partição de teste passará ainda por um filtro com o tamanho do treino inicial, definido no próximo parâmetro.
    - `train_initial_size`: define o tamanho inicial que será usado para treino dos modelos. Esse tamanho é uma porcentagem da partição de treino, por exemplo, 0.5 (50%) quer dizer que o treino será feito inicialmente com 50% das interações separadas para treino. Vale ressaltar que essa porcentagem é relacionada apenas à partição de treino, então, se temos uma partição de treino de 0.9 (90%) e o “train_initial_size” é definido como 0.5 (50%), então, teremos 45% (0.9 * 0.5) das interações todas para o treino inicial. Com a base de treino separada com essa porcentagem inicial, a base de teste passara por um filtro, removendo todas as interações com itens ou usuários que nunca foram vistos nesse treino inicial.
    - `train_extra_increment_step_size`: define a porcentagem do "treinamento extra" que será usado. No início a base de dados é separada em treino inicial (train_initial_size), "treinamento extra" e teste. O "treinamento extra", assim como o teste, passa por um filtro para remover interações com itens ou usuários que nunca foram vistos no treino inicial. Após o treino inicial, o "treinamento extra" é usado para treinar os modelos de embeddings e os modelos de bandit. O "treinamento extra" é incrementado a cada iteração, de acordo com o valor desse parâmetro. Por exemplo, se o `train_extra_increment_step_size` é 0.1 (10%), então, a cada iteração, 10% das interações são adicionadas ao treino, até que todo o "treinamento extra" seja usado.
    '''
    results = []
    df_recs = pd.DataFrame(columns=['algorithm', 'interaction_number', 'user_id', 'item_id', 'recommendations'])
    df_train = pd.read_csv(train_data)
    df_test = pd.read_csv(test_data)

    df_full = pd.concat([df_train, df_test])

    df_full['user_id'] = LabelEncoder().fit_transform(df_full['user_id'])
    df_full['item_id'] = LabelEncoder().fit_transform(df_full['item_id'])

    num_users = df_full['user_id'].nunique()
    num_items = df_full['item_id'].nunique()

    split_index = int(len(df_full) * (1 - test_size))
    df_train_full = df_full[:split_index]
    df_test = df_full[split_index:]

    initial_df_train = df_train_full[:int(len(df_train_full) * train_initial_size)]
    extra_df_train = df_train_full[int(len(df_train_full) * train_initial_size):]
    extra_df_train = extra_df_train[(extra_df_train['user_id'].isin(initial_df_train['user_id'])) & (extra_df_train['item_id'].isin(initial_df_train['item_id']))]
    extra_df_train = extra_df_train.reset_index(drop=True)

    df_test = df_test[(df_test['user_id'].isin(initial_df_train['user_id'])) & (df_test['item_id'].isin(initial_df_train['item_id']))]
    df_test = df_test.reset_index(drop=True)
    df_test_for_evaluation = df_test[df_test['response'] == 1]

    print('Generating ALS embeddings')
    ALS_model, _, ALS_user_features = train_embeddings_model(implicit.als.AlternatingLeastSquares, initial_df_train, num_users, num_items, generate_embeddings=True)

    print('Generating BPR embeddings')
    BPR_model, _, BPR_user_features = train_embeddings_model(implicit.bpr.BayesianPersonalizedRanking, initial_df_train, num_users, num_items, generate_embeddings=True)

    def save_algo_result(algo_name, hits, hr, spent_time, df_recs_algo, current_extra_train_size):
        df_recs_algo['algorithm'] = algo_name
        df_recs_algo['train_size'] = current_extra_train_size
        df_recs_new = pd.concat([df_recs, df_recs_algo])
        results.append({'algorithm': algo_name, 'hits': hits, 'hr': hr, 'time': spent_time, 'train_size': current_extra_train_size})
        return df_recs_new

    current_extra_train_size = 0
    while current_extra_train_size <= 1:
        print(f"Current extra train size: {current_extra_train_size}")

        current_df_train = pd.concat([initial_df_train, extra_df_train[:int(len(extra_df_train) * current_extra_train_size)]])
        interactions_by_user = group_interactions_by_user(current_df_train)  # MUDANÇA AQUI

        # -------------- ALS -----------------
        print('Training ALS')
        ALS_model, sparse_matrix = train_embeddings_model(implicit.als.AlternatingLeastSquares, current_df_train, num_users, num_items)

        print('Testing ALS')
        hits, hr, spent_time, df_recs_als = test_embeddings_model(ALS_model, sparse_matrix, df_test_for_evaluation)
        df_recs = save_algo_result('ALS', hits, hr, spent_time, df_recs_als, current_extra_train_size)

        # -------------- BPR -----------------
        print('Training BPR')
        BPR_model, sparse_matrix = train_embeddings_model(implicit.bpr.BayesianPersonalizedRanking, current_df_train, num_users, num_items)

        print('Testing BPR')
        hits, hr, spent_time, df_recs_bpr = test_embeddings_model(BPR_model, sparse_matrix, df_test_for_evaluation)
        df_recs = save_algo_result('BPR', hits, hr, spent_time, df_recs_bpr, current_extra_train_size)

        # ------ LinUCB - non-incremental - ALS embeddings -------
        print('Training LinUCB - non-incremental - ALS embeddings')
        linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
        train(linUCB_model, data=current_df_train, user_features=ALS_user_features)

        print('Testing LinUCB - non-incremental - ALS embeddings')
        hits, hr, spent_time, df_recs_linUCB = test_non_incremental(linUCB_model, ALS_user_features, df_test_for_evaluation, interactions_by_user)
        df_recs = save_algo_result('LinUCB - non-incremental - ALS embeddings', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size)


        # ------ LinUCB - non-incremental - BPR embeddings -------
        print('Training LinUCB - non-incremental - BPR embeddings')
        linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
        train(linUCB_model, data=current_df_train, user_features=BPR_user_features)

        print('Testing LinUCB - non-incremental - BPR embeddings')
        hits, hr, spent_time, df_recs_linUCB = test_non_incremental(linUCB_model, BPR_user_features, df_test_for_evaluation, interactions_by_user)
        df_recs = save_algo_result('LinUCB - non-incremental - BPR embeddings', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size)


        # ------ LinUCB - incremental - ALS embeddings -------
        print('Training LinUCB - incremental - ALS embeddings')
        linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
        train(linUCB_model, data=current_df_train, user_features=ALS_user_features)

        print('Testing LinUCB - incremental - ALS embeddings')
        hits, hr, spent_time, df_recs_linUCB = test_incremental(linUCB_model, ALS_user_features, df_test, df_test_for_evaluation, 10, interactions_by_user)
        df_recs = save_algo_result('LinUCB - incremental - ALS embeddings', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size)


        # ------ LinUCB - incremental - BPR embeddings -------
        print('Training LinUCB - incremental - BPR embeddings')
        linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=0.1), top_k=10)
        train(linUCB_model, data=current_df_train, user_features=BPR_user_features)

        print('Testing LinUCB - incremental - BPR embeddings')
        hits, hr, spent_time, df_recs_linUCB = test_incremental(linUCB_model, BPR_user_features, df_test, df_test_for_evaluation, 10, interactions_by_user)
        df_recs = save_algo_result('LinUCB - incremental - BPR embeddings', hits, hr, spent_time, df_recs_linUCB, current_extra_train_size)


        # ------ LinGreedy - non-incremental - ALS embeddings -------
        print('Training LinGreedy - non-incremental - ALS embeddings')
        linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
        train(linGreedy_model, data=current_df_train, user_features=ALS_user_features)

        print('Testing LinGreedy - non-incremental - ALS embeddings')
        hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, ALS_user_features, df_test_for_evaluation, interactions_by_user)
        df_recs = save_algo_result('LinGreedy - non-incremental - ALS embeddings', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size)


        # ------ LinGreedy - non-incremental - BPR embeddings -------
        print('Training LinGreedy - non-incremental - BPR embeddings')
        linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
        train(linGreedy_model, data=current_df_train, user_features=BPR_user_features)

        print('Testing LinGreedy - non-incremental - BPR embeddings')
        hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, BPR_user_features, df_test_for_evaluation, interactions_by_user)
        df_recs = save_algo_result('LinGreedy - non-incremental - BPR embeddings', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size)


        # ------ LinGreedy - incremental - ALS embeddings -------
        print('Training LinGreedy - incremental - ALS embeddings')
        linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
        train(linGreedy_model, data=current_df_train, user_features=ALS_user_features)

        print('Testing LinGreedy - incremental - ALS embeddings')
        hits, hr, spent_time, df_recs_linGreedy = test_incremental(linGreedy_model, ALS_user_features, df_test, df_test_for_evaluation, 10, interactions_by_user)
        df_recs = save_algo_result('LinGreedy - incremental - ALS embeddings', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size)

        # ------ LinGreedy - incremental - BPR embeddings -------
        print('Training LinGreedy - incremental - BPR embeddings')
        linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=0.01), top_k=10)
        train(linGreedy_model, data=current_df_train, user_features=BPR_user_features)

        print('Testing LinGreedy - incremental - BPR embeddings')
        hits, hr, spent_time, df_recs_linGreedy = test_incremental(linGreedy_model, BPR_user_features, df_test, df_test_for_evaluation, 10, interactions_by_user)
        df_recs = save_algo_result('LinGreedy - incremental - BPR embeddings', hits, hr, spent_time, df_recs_linGreedy, current_extra_train_size)
        
        # Incrementando o tamanho do treino para próxima iteração
        current_extra_train_size = round(current_extra_train_size + train_extra_increment_step_size, 2)
    
    save_path = f'results-v5/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    df_results = pd.DataFrame(results)
    df_results = df_results.astype({'hits': int, 'hr': float, 'time': float})
    df_results['test_size'] = round(test_size, 2)
    df_results['test_interactions'] = len(df_test_for_evaluation)

    df_results.to_csv(f'{save_path}/results.csv', index=False)
    df_recs.to_csv(f'{save_path}/recs.csv', index=False)

In [22]:
test(test_size=0.1, train_initial_size=0.5, train_extra_increment_step_size=0.1)

Generating ALS embeddings


  0%|          | 0/15 [00:00<?, ?it/s]

Generating BPR embeddings


  0%|          | 0/100 [00:00<?, ?it/s]

Current extra train size: 0
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.1
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.2
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.3
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.4
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.5
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.6
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.7
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.8
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 0.9
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings
Current extra train size: 1.0
Training ALS


  0%|          | 0/15 [00:00<?, ?it/s]

Testing ALS
Training BPR


  0%|          | 0/100 [00:00<?, ?it/s]

Testing BPR
Training LinUCB - non-incremental - ALS embeddings
Testing LinUCB - non-incremental - ALS embeddings
Training LinUCB - non-incremental - BPR embeddings
Testing LinUCB - non-incremental - BPR embeddings
Training LinUCB - incremental - ALS embeddings
Testing LinUCB - incremental - ALS embeddings
Training LinUCB - incremental - BPR embeddings
Testing LinUCB - incremental - BPR embeddings
Training LinGreedy - non-incremental - ALS embeddings
Testing LinGreedy - non-incremental - ALS embeddings
Training LinGreedy - non-incremental - BPR embeddings
Testing LinGreedy - non-incremental - BPR embeddings
Training LinGreedy - incremental - ALS embeddings
Testing LinGreedy - incremental - ALS embeddings
Training LinGreedy - incremental - BPR embeddings
Testing LinGreedy - incremental - BPR embeddings


In [23]:
df_results = pd.read_csv('results-v5/results.csv')

In [24]:
fig = px.line(df_results, x="train_size", y="hr", color='algorithm', title='HR x Train size')
fig.show()
fig.write_html('results-v5/hr_x_train_size.html')