# Testes de treinamento incremental

A ideia deste notebook é verificar se treinar os algoritmos de MAB durante as interações de teste geram alguma melhora para eles, comparando-os com os resultados de um algoritmo estático (ALS).

In [1]:
import pandas as pd
from mab2rec import BanditRecommender, LearningPolicy
from mab2rec.pipeline import train
from sklearn.preprocessing import LabelEncoder
import implicit
from scipy.sparse import csr_matrix
from implicit.nearest_neighbours import bm25_weight
import plotly.express as px
import time
import os
import math

train_data = "../data/ml100k/data_train.csv"
test_data = "../data/ml100k/data_test.csv"

In [2]:
def initial_train(df, num_users, num_items):
    FACTORS = 10
    K1 = 100
    B = 0.8

    # Cria a matriz esparsa
    sparse_matrix = csr_matrix((df['response'], (df['user_id'], df['item_id'])), shape=(num_users, num_items))
    updated_sparse_matrix = bm25_weight(sparse_matrix, K1=K1, B=B)

    print('Treinando o modelo ALS')
    ALS_model = implicit.als.AlternatingLeastSquares(factors=FACTORS, random_state=1)
    ALS_model.fit(updated_sparse_matrix)

    user_features_list = []

    for user_id in df['user_id'].unique():
        user_factors = ALS_model.user_factors[user_id]
        user_features_list.append([user_id] + list(user_factors))

    df_user_features = pd.DataFrame(user_features_list, columns=['user_id'] + [f'u{i}' for i in range(FACTORS)])

    print('Treinando o modelo LinUCB')
    linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(), top_k=10)
    train(linUCB_model, data=df, user_features=df_user_features)
    

    print('Treinando o modelo LinGreedy')
    linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(), top_k=10)
    train(linGreedy_model, data=df, user_features=df_user_features)

    return ALS_model, linUCB_model, linGreedy_model, df_user_features, sparse_matrix

In [3]:

def test_ALS(ALS_model, sparse_matrix, df_test):
    print('Testing ALS')
    all_recs = []

    start_time = time.time()
    hits = 0
    for i, interaction in df_test.iterrows():
        ids_recs, _ = ALS_model.recommend(userid=interaction['user_id'], user_items=sparse_matrix[interaction['user_id']], N=10)
        if interaction['item_id'] in ids_recs:
            hits += 1
        all_recs.append(ids_recs.tolist())
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': all_recs
    })
    

    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [4]:
def test_non_incremental(mab_algo, algo_name, user_features, df_test):
    start_time = time.time()
    hits = 0

    contexts = df_test.merge(user_features, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values

    recomendations = mab_algo.recommend(contexts)

    df_test = df_test.reset_index(drop=True)

    hits = 0
    for i, interaction in df_test.iterrows():
        if interaction['item_id'] in recomendations[i]:
            hits += 1
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test))],
        'user_id': df_test['user_id'],
        'item_id': df_test['item_id'],
        'recommendations': recomendations
    })

    return hits, hits/len(df_test), time.time() - start_time, recs_df

In [5]:
def test_incremental(mab_algo, algo_name, user_features, df_test, df_test_for_evaluation, batch_size):
    recs = []

    start_time = time.time()
    hits = 0

    for i in range(0, len(df_test), batch_size):
        # Fazendo recomendações para teste
        df_batch_test = df_test_for_evaluation.loc[i:i+batch_size-1]
        contexts = df_batch_test.merge(user_features, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values

        if len(contexts) > 0: # Se não tiver nenhuma interação positiva, não faz sentido fazer recomendações
            recomendations = mab_algo.recommend(contexts)
            if isinstance(recomendations, list) and isinstance(recomendations[0], int):
                # Quando o contexto tem tamanho 1, a recomendação é uma lista, e não uma lista de listas
                recomendations = [recomendations]

            df_batch_test = df_batch_test.reset_index(drop=True)

            for j, interaction in df_batch_test.iterrows():
                if interaction['item_id'] in recomendations[j]:
                    hits += 1
            
            recs.extend(recomendations)
        
        # Treinando com o batch
        df_batch_train = df_test.loc[i:i+batch_size-1]
        contexts = df_batch_train.merge(user_features, on='user_id').drop(columns=['user_id', 'item_id', 'response']).values

        mab_algo.partial_fit(df_batch_train['item_id'], df_batch_train['response'], contexts)
    
    recs_df = pd.DataFrame({
        'interaction_number': [i for i in range(len(df_test_for_evaluation))],
        'user_id': df_test_for_evaluation['user_id'],
        'item_id': df_test_for_evaluation['item_id'],
        'recommendations': recs
    })

    return hits, hits/len(df_test_for_evaluation), time.time() - start_time, recs_df

In [6]:
def test(train_size, batchs_sizes, linUCBAlphas, linGreedyEpsilons):
    save_path = f'results-v2/{round(train_size * 100):02}-{round((1 - train_size) * 100):02}'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    results = []
    df_recs = pd.DataFrame(columns=['algorithm', 'hiperparams', 'batch_size', 'interaction_number', 'user_id', 'item_id', 'recommendations'])
    df_train = pd.read_csv(train_data)
    df_test = pd.read_csv(test_data)

    df_full = pd.concat([df_train, df_test])

    df_full['user_id'] = LabelEncoder().fit_transform(df_full['user_id'])
    df_full['item_id'] = LabelEncoder().fit_transform(df_full['item_id'])

    num_users = df_full['user_id'].nunique()
    num_items = df_full['item_id'].nunique()

    split_index = int(len(df_full) * train_size)
    df_train = df_full[:split_index]
    df_test = df_full[split_index:]

    df_test = df_test[(df_test['user_id'].isin(df_train['user_id'])) & (df_test['item_id'].isin(df_train['item_id']))]
    df_test = df_test.reset_index(drop=True)
    df_test_for_evaluation = df_test[df_test['response'] == 1]

    ALS_model, linUCB_model, linGreedy_model, df_user_features, sparse_matrix = initial_train(df_train, num_users, num_items)

    hits, hr, spent_time, df_recs_als = test_ALS(ALS_model, sparse_matrix, df_test_for_evaluation)
    df_recs_als['algorithm'] = 'ALS'
    df_recs_als['hiperparams'] = [{} for _ in range(len(df_recs_als))]
    df_recs = pd.concat([df_recs, df_recs_als])
    results.append({'algorithm': 'ALS', 'hits': hits, 'hr': hr, 'time': spent_time, 'hiperparams': {}})

    print('\nTesting non-incremental\n')

    for alpha in linUCBAlphas:
        print(f'Treinando o modelo LinUCB com alpha = {alpha}')

        linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=alpha), top_k=10)
        train(linUCB_model, data=df_train, user_features=df_user_features)

        hits, hr, spent_time, df_recs_linUCB = test_non_incremental(linUCB_model, 'LinUCB', df_user_features, df_test_for_evaluation)
        df_recs_linUCB['algorithm'] = 'LinUCB (non_incremental)'
        df_recs_linUCB['hiperparams'] = [{'alpha': alpha} for _ in range(len(df_recs_linUCB))]
        df_recs = pd.concat([df_recs, df_recs_linUCB])
        results.append({'algorithm': 'LinUCB (non_incremental)', 'hits': hits, 'hr': hr, 'time': spent_time, 'hiperparams': {'alpha': alpha}})

    for epsilon in linGreedyEpsilons:
        print(f'Treinando o modelo LinGreedy com epsilon = {epsilon}')

        linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=epsilon), top_k=10)
        train(linGreedy_model, data=df_train, user_features=df_user_features)

        hits, hr, spent_time, df_recs_linGreedy = test_non_incremental(linGreedy_model, 'LinGreedy', df_user_features, df_test_for_evaluation)
        df_recs_linGreedy['algorithm'] = 'LinGreedy (non_incremental)'
        df_recs_linGreedy['hiperparams'] = [{'epsilon': epsilon} for _ in range(len(df_recs_linGreedy))]
        df_recs = pd.concat([df_recs, df_recs_linGreedy])
        results.append({'algorithm': 'LinGreedy (non_incremental)', 'hits': hits, 'hr': hr, 'time': spent_time, 'hiperparams': {'epsilon': epsilon}})

    print('\nTesting incremental\n')
    
    for batch_size in batchs_sizes:
        print('Testing batch size: ', batch_size)

        for alpha in linUCBAlphas:
            print(f'Treinando o modelo LinUCB com alpha = {alpha}')

            linUCB_model = BanditRecommender(learning_policy=LearningPolicy.LinUCB(alpha=alpha), top_k=10)
            train(linUCB_model, data=df_train, user_features=df_user_features)

            hits, hr, spent_time, df_recs_linUCB = test_incremental(linUCB_model, 'LinUCB', df_user_features, df_test, df_test_for_evaluation, batch_size)
            df_recs_linUCB['algorithm'] = 'LinUCB (incremental)'
            df_recs_linUCB['hiperparams'] = [{'alpha': alpha} for _ in range(len(df_recs_linUCB))]
            df_recs_linUCB['batch_size'] = batch_size
            df_recs = pd.concat([df_recs, df_recs_linUCB])
            results.append({'algorithm': 'LinUCB (incremental)', 'hits': hits, 'hr': hr, 'time': spent_time, 'batch_size': batch_size, 'hiperparams': {'alpha': alpha}})

        for epsilon in linGreedyEpsilons:
            print(f'Treinando o modelo LinGreedy com epsilon = {epsilon}')

            linGreedy_model = BanditRecommender(learning_policy=LearningPolicy.LinGreedy(epsilon=epsilon), top_k=10)
            train(linGreedy_model, data=df_train, user_features=df_user_features)

            hits, hr, spent_time, df_recs_linGreedy = test_incremental(linGreedy_model, 'LinGreedy', df_user_features, df_test, df_test_for_evaluation, batch_size)
            df_recs_linGreedy['algorithm'] = 'LinGreedy (incremental)'
            df_recs_linGreedy['hiperparams'] = [{'epsilon': epsilon} for _ in range(len(df_recs_linGreedy))]
            df_recs_linGreedy['batch_size'] = batch_size
            df_recs = pd.concat([df_recs, df_recs_linGreedy])
            results.append({'algorithm': 'LinGreedy (incremental)', 'hits': hits, 'hr': hr, 'time': spent_time, 'batch_size': batch_size, 'hiperparams': {'epsilon': epsilon}})
    
    df_results = pd.DataFrame(results)
    df_results = df_results.astype({'hits': int, 'hr': float, 'time': float})
    df_results['test_size'] = round(1 - train_size, 2)
    df_results['test_interactions'] = len(df_test_for_evaluation)

    df_results.to_csv(f'{save_path}/results.csv', index=False)
    df_recs.to_csv(f'{save_path}/recs.csv', index=False)

In [10]:
train_sizes = [0.95, 0.90, 0.85, 0.80, 0.75, 0.70, 0.65, 0.60, 0.55, 0.50, 0.45, 0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05]
batch_sizes = [10, 100, 1000]
linUCBAlphas = [0.1, 0.5, 1, 1.5, 2]
linGreedyEpsilons = [0.01, 0.05, 0.1, 0.25, 0.5]

for train_size in train_sizes:
    test(
        train_size=train_size, 
        batchs_sizes=batch_sizes,
        linUCBAlphas=linUCBAlphas,
        linGreedyEpsilons=linGreedyEpsilons
    )

Treinando o modelo ALS




  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L



  0%|          | 0/15 [00:00<?, ?it/s]

Treinando o modelo LinUCB
Treinando o modelo LinGreedy
Testing ALS

Testing non-incremental

Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo LinGreedy com epsilon = 0.5

Testing incremental

Testing batch size:  10
Treinando o modelo LinUCB com alpha = 0.1
Treinando o modelo LinUCB com alpha = 0.5
Treinando o modelo LinUCB com alpha = 1
Treinando o modelo LinUCB com alpha = 1.5
Treinando o modelo LinUCB com alpha = 2
Treinando o modelo LinGreedy com epsilon = 0.01
Treinando o modelo LinGreedy com epsilon = 0.05
Treinando o modelo LinGreedy com epsilon = 0.1
Treinando o modelo LinGreedy com epsilon = 0.25
Treinando o modelo L

In [11]:
all_dfs = []

for train_size in train_sizes:
    save_path = f'results-v2/{round(train_size * 100):02}-{round((1 - train_size) * 100):02}/results.csv'
    all_dfs.append(pd.read_csv(save_path))
    
df_results = pd.concat(all_dfs)
df_results

Unnamed: 0,algorithm,hits,hr,time,hiperparams,batch_size,test_size,test_interactions
0,ALS,46,0.153846,0.121034,{},,0.05,299
1,LinUCB (non_incremental),22,0.073579,0.084711,{'alpha': 0.1},,0.05,299
2,LinUCB (non_incremental),21,0.070234,0.090569,{'alpha': 0.5},,0.05,299
3,LinUCB (non_incremental),22,0.073579,0.102270,{'alpha': 1},,0.05,299
4,LinUCB (non_incremental),19,0.063545,0.088679,{'alpha': 1.5},,0.05,299
...,...,...,...,...,...,...,...,...
36,LinGreedy (incremental),621,0.075346,4.632085,{'epsilon': 0.01},1000.0,0.95,8242
37,LinGreedy (incremental),616,0.074739,5.307563,{'epsilon': 0.05},1000.0,0.95,8242
38,LinGreedy (incremental),602,0.073041,4.762714,{'epsilon': 0.1},1000.0,0.95,8242
39,LinGreedy (incremental),561,0.068066,5.075955,{'epsilon': 0.25},1000.0,0.95,8242


In [12]:
df_results.to_csv('results-v2/concat_results.csv', index=False)

In [13]:
df_results_incremental = df_results.dropna()
df_results_incremental

Unnamed: 0,algorithm,hits,hr,time,hiperparams,batch_size,test_size,test_interactions
11,LinUCB (incremental),17,0.056856,9.878619,{'alpha': 0.1},10.0,0.05,299
12,LinUCB (incremental),20,0.066890,11.607202,{'alpha': 0.5},10.0,0.05,299
13,LinUCB (incremental),19,0.063545,10.025658,{'alpha': 1},10.0,0.05,299
14,LinUCB (incremental),18,0.060201,9.174750,{'alpha': 1.5},10.0,0.05,299
15,LinUCB (incremental),17,0.056856,9.066746,{'alpha': 2},10.0,0.05,299
...,...,...,...,...,...,...,...,...
36,LinGreedy (incremental),621,0.075346,4.632085,{'epsilon': 0.01},1000.0,0.95,8242
37,LinGreedy (incremental),616,0.074739,5.307563,{'epsilon': 0.05},1000.0,0.95,8242
38,LinGreedy (incremental),602,0.073041,4.762714,{'epsilon': 0.1},1000.0,0.95,8242
39,LinGreedy (incremental),561,0.068066,5.075955,{'epsilon': 0.25},1000.0,0.95,8242


In [14]:
df_results_incremental['algo_batch_params'] = df_results_incremental['algorithm'] + ' - ' + df_results_incremental['batch_size'].astype(str) + ' - ' + df_results_incremental['hiperparams'].apply(str)
df_results_incremental

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_results_incremental['algo_batch_params'] = df_results_incremental['algorithm'] + ' - ' + df_results_incremental['batch_size'].astype(str) + ' - ' + df_results_incremental['hiperparams'].apply(str)


Unnamed: 0,algorithm,hits,hr,time,hiperparams,batch_size,test_size,test_interactions,algo_batch_params
11,LinUCB (incremental),17,0.056856,9.878619,{'alpha': 0.1},10.0,0.05,299,LinUCB (incremental) - 10.0 - {'alpha': 0.1}
12,LinUCB (incremental),20,0.066890,11.607202,{'alpha': 0.5},10.0,0.05,299,LinUCB (incremental) - 10.0 - {'alpha': 0.5}
13,LinUCB (incremental),19,0.063545,10.025658,{'alpha': 1},10.0,0.05,299,LinUCB (incremental) - 10.0 - {'alpha': 1}
14,LinUCB (incremental),18,0.060201,9.174750,{'alpha': 1.5},10.0,0.05,299,LinUCB (incremental) - 10.0 - {'alpha': 1.5}
15,LinUCB (incremental),17,0.056856,9.066746,{'alpha': 2},10.0,0.05,299,LinUCB (incremental) - 10.0 - {'alpha': 2}
...,...,...,...,...,...,...,...,...,...
36,LinGreedy (incremental),621,0.075346,4.632085,{'epsilon': 0.01},1000.0,0.95,8242,LinGreedy (incremental) - 1000.0 - {'epsilon':...
37,LinGreedy (incremental),616,0.074739,5.307563,{'epsilon': 0.05},1000.0,0.95,8242,LinGreedy (incremental) - 1000.0 - {'epsilon':...
38,LinGreedy (incremental),602,0.073041,4.762714,{'epsilon': 0.1},1000.0,0.95,8242,LinGreedy (incremental) - 1000.0 - {'epsilon':...
39,LinGreedy (incremental),561,0.068066,5.075955,{'epsilon': 0.25},1000.0,0.95,8242,LinGreedy (incremental) - 1000.0 - {'epsilon':...


In [23]:
fig = px.line(df_results_incremental, x="test_size", y="hr", color='algo_batch_params', title='HR x Test size (incremental batch size)')
fig.show()
fig.write_html('results-v2/hr_x_test_size_incremental_batch_size.html')

In [16]:
df_results_non_incremental = df_results[df_results['batch_size'].isna()]
df_results_non_incremental = df_results_non_incremental[df_results_non_incremental['algorithm'] != 'ALS']
df_results_non_incremental

Unnamed: 0,algorithm,hits,hr,time,hiperparams,batch_size,test_size,test_interactions
1,LinUCB (non_incremental),22,0.073579,0.084711,{'alpha': 0.1},,0.05,299
2,LinUCB (non_incremental),21,0.070234,0.090569,{'alpha': 0.5},,0.05,299
3,LinUCB (non_incremental),22,0.073579,0.102270,{'alpha': 1},,0.05,299
4,LinUCB (non_incremental),19,0.063545,0.088679,{'alpha': 1.5},,0.05,299
5,LinUCB (non_incremental),17,0.056856,0.080642,{'alpha': 2},,0.05,299
...,...,...,...,...,...,...,...,...
6,LinGreedy (non_incremental),579,0.070250,1.512720,{'epsilon': 0.01},,0.95,8242
7,LinGreedy (non_incremental),566,0.068673,2.340154,{'epsilon': 0.05},,0.95,8242
8,LinGreedy (non_incremental),556,0.067459,1.864986,{'epsilon': 0.1},,0.95,8242
9,LinGreedy (non_incremental),521,0.063213,1.936434,{'epsilon': 0.25},,0.95,8242


In [24]:
df_results_non_incremental['algo_params'] = df_results_non_incremental['algorithm'] + ' - ' + df_results_non_incremental['hiperparams'].apply(str)

In [25]:
fig = px.line(df_results_non_incremental, x="test_size", y="hr", color='algo_params', title='HR x Test size (non-incremental)')
fig.show()
fig.write_html('results-v2/hr_x_test_size_non_incremental.html')

In [26]:
def filter_best_results(df_results):  # Mudar essa função depois de analisar os resultados
    df_results_filtered = df_results[
        ((df_results['algorithm'] == 'LinUCB (non_incremental)') & (df_results['hiperparams'] == str({'alpha': 0.1}))) |
        ((df_results['algorithm'] == 'LinGreedy (non_incremental)') & (df_results['hiperparams'] == str({'epsilon': 0.01}))) |
        (df_results['algorithm'] == 'ALS') |
        ((df_results['algorithm'] == 'LinUCB (incremental)') & (df_results['batch_size'] == 10.0) & (df_results['hiperparams'] == str({'alpha': 0.1}))) |
        ((df_results['algorithm'] == 'LinGreedy (incremental)') & (df_results['batch_size'] == 10.0) & (df_results['hiperparams'] == str({'epsilon': 0.01})))
    ]
    return df_results_filtered

In [27]:
df_results_filtered = filter_best_results(df_results)
df_results_filtered

Unnamed: 0,algorithm,hits,hr,time,hiperparams,batch_size,test_size,test_interactions
0,ALS,46,0.153846,0.121034,{},,0.05,299
1,LinUCB (non_incremental),22,0.073579,0.084711,{'alpha': 0.1},,0.05,299
6,LinGreedy (non_incremental),22,0.073579,0.070667,{'epsilon': 0.01},,0.05,299
11,LinUCB (incremental),17,0.056856,9.878619,{'alpha': 0.1},10.0,0.05,299
16,LinGreedy (incremental),18,0.060201,8.758266,{'epsilon': 0.01},10.0,0.05,299
...,...,...,...,...,...,...,...,...
0,ALS,749,0.090876,3.871593,{},,0.95,8242
1,LinUCB (non_incremental),580,0.070371,1.803205,{'alpha': 0.1},,0.95,8242
6,LinGreedy (non_incremental),579,0.070250,1.512720,{'epsilon': 0.01},,0.95,8242
11,LinUCB (incremental),611,0.074132,215.646211,{'alpha': 0.1},10.0,0.95,8242


In [29]:
fig = px.line(df_results_filtered, x="test_size", y="hr", color='algorithm', title='HR x Test size')
fig.show()
fig.write_html('results-v2/hr_x_test_size.html')

In [40]:
train_size = 0.15
save_path = f'results-v2/{round(train_size * 100):02}-{round((1 - train_size) * 100):02}/recs.csv'

df_recs = pd.read_csv(save_path)
df_recs = filter_best_results(df_recs)

new_algo_names = []
for i, row in df_recs.iterrows():
    new_name = row['algorithm']
    if not math.isnan(row['batch_size']):
        new_name += f' - {row["batch_size"]}'
    if row['hiperparams'] != '{}':
        new_name += f' - {row["hiperparams"]}'
    new_algo_names.append(new_name)

df_recs['algorithm'] = new_algo_names

df_hrs = pd.DataFrame(columns=['algorithm', 'interaction_number', 'hr'])
for algo in df_recs['algorithm'].unique():
    curr_interactions = df_recs[df_recs['algorithm'] == algo]
    curr_interactions = curr_interactions.sort_values(by='interaction_number')

    hrs = []
    hits = 0
    total = 0
    for i, interaction in curr_interactions.iterrows():
        total += 1
        if interaction['item_id'] in eval(interaction['recommendations']):
            hits += 1
        hrs.append(hits / total)
    
    df_hrs = pd.concat([df_hrs, pd.DataFrame({'algorithm': [algo for _ in range(len(hrs))], 'interaction_number': curr_interactions['interaction_number'], 'hr': hrs})])

fig = px.line(df_hrs, x='interaction_number', y='hr', color='algorithm', title='HR x Interaction number')
fig.show()