## Configuração inicial

In [5]:
import pickle
import pandas as pd

from Utils import templates, utils, recommender

In [6]:
## Configurações base

config = {
    #"runtime": "ROCm llama.cpp v1.23.0", 
    #"runtime": "CPU llama.cpp v1.22.2", # performance ruim
    "LLM_runtime": "Vulkan llama.cpp v1.23.0", # melhor opção
    "dataset": "ml_100k",
    "nsu" : 18,     # número de usuários para filtragem colaborativa        | SSBD :12  | Default :18   | 
    "nci" :24,      # número de itens para filtragem colaborativa           | SSBD :19  | Default :24   | Max : 1682
    "lenlimit" : 24,  # limite de tamanho para a lista filmes assistidos    | SSBD : 8  | Default :24   | Max : 1682
    "test_run" : 0, # define a quantidade de recomendações,                 |           | Default :0    | Max : 943 
    "obs": "base"
}

## define o prompt template
prompt_template = templates.PROMPT_TEMPLATE_2
config.update({"prompt_template": prompt_template})

## define o prompt para formatar a resposta final 
#prompt_format = templates.PROMPT_TEMPLATE_ESTRUCTURE
prompt_format = "" # para não utilizar 
config.update({"prompt_format": prompt_format})

# load movie lens 100k dataset
dataset = utils.read_json("Data/ML100K_clean.json")
print(f'Quantidade de Usuários: {len(dataset)}')

Quantidade de Usuários: 943


---------   
## Escolha do modelo

### mistral-7b-instruct-v0.3

In [7]:
config.update({
    "model_name" :"mistral-7b-instruct-v0.3",
    "Arch" : "llama",
    "Quantization" : "Q4_K_M",
    "Temperature": 0.1,
    "max_tokens" : -1,  # Default : 4096
    "GPU Offload": 34,
    "CPU Thread Pool Size": 6,
    "Evaluation Batch Size": 512,
    "Flash Attention": False, # não vi vantagem no uso 
})

### gemma-3-4b-it

In [None]:
config.update({
    "model_name" :"gemma-3-4b-it",
    "Arch" : "gemma3",
    "Quantization" : "Q4_K_M",
    "Temperature": 0.1,
    "max_tokens" : -1,  # Default : 4096
    "GPU Offload": 34,
    "CPU Thread Pool Size": 6,
    "Evaluation Batch Size": 512,
    "Flash Attention": False, # não vi vantagem no uso 
})

### meta-llama-3.1-8b-instruct

In [None]:
config.update({
    "model_name" :"meta-llama-3.1-8b-instruct",
    "Arch" : "llama",
    "Quantization" : "Q4_K_M",
    "Temperature": 0.1,
    "max_tokens" : 4096,
    "GPU Offload": 34,
    "CPU Thread Pool Size": 6,
    "Evaluation Batch Size": 512,
    "Flash Attention": False,
})

### llama-3.2-3b-instruct

In [None]:
config.update({
    "model_name" :"llama-3.2-3b-instruct",
    "Arch" : "llama",
    "Quantization" : "Q8_0",
    "Temperature": 0,
    "max_tokens" : -1, #Default :4096
    "GPU Offload": 34,
    "CPU Thread Pool Size": 6,
    "Evaluation Batch Size": 512,
    "Flash Attention": False,
})

In [None]:
# retorna apenas o usuários que tem o gt no candidatos.
# implementar sobre o pipeline do projeto 

def get_candidate_ids_list(data, id_list, user_matrix_sim, num_u, num_i):
    cand_ids = []
    for i in id_list:
        watched_movies = data[i][0].split(' | ')
        candidate_items = utils.sort_user_filtering_items(data, watched_movies, user_matrix_sim[i], num_u, num_i)
        if data[i][-1] in candidate_items:
            cand_ids.append(i)
    return cand_ids

In [None]:
id_list = list(range(0, len(dataset)))
#assert(len(id_list) == 943) # aqui é verificado se a lista possue exatamente essa quantidade

# Building indexes and similarity matrices for users and movies.
movie_idx = utils.build_moviename_index_dict(dataset)
user_sim_matrix = utils.build_user_similarity_matrix(dataset, movie_idx)


# para fazer a filtragem sobre os filmes 
#pop_dict = utils.build_movie_popularity_dict(dataset) 
#item_sim_matrix = utils.build_item_similarity_matrix(dataset)

## Execução

In [8]:
result_pkl = recommender.recommendation_workflow(config         = config,
                                                 dataset        = dataset,
                                                 prompt_template= prompt_template,
                                                 prompt_format  = prompt_format)

Processando:   0%|          | 0/943 [00:00<?, ?it/s]

## execução varias configs

In [None]:
import pickle
import pandas as pd

from Utils import templates, utils, recommender

config = {}

## define o prompt template
prompt_template = templates.PROMPT_TEMPLATE_2
config.update({"prompt_template": prompt_template})

## define o prompt para formatar a resposta final 
#prompt_format = templates.PROMPT_TEMPLATE_ESTRUCTURE
prompt_format = "" # para não utilizar 
config.update({"prompt_format": prompt_format})

# load movie lens 100k dataset
dataset = utils.read_json("Data/ML100K_clean.json")
print(f'Quantidade de Usuários: {len(dataset)}')

config.update({
    #"runtime": "ROCm llama.cpp v1.23.0", 
    #"runtime": "CPU llama.cpp v1.22.2", # performance ruim
    "LLM_runtime": "Vulkan llama.cpp v1.23.0", # melhor opção
    "dataset": "ml_100k",
    "nsu" : 18,     # número de usuários para filtragem colaborativa        | SSBD :12  | Default :18   | 
    "nci" :24,      # número de itens para filtragem colaborativa           | SSBD :19  | Default :24   | Max : 1682
    "lenlimit" : 24,  # limite de tamanho para a lista filmes assistidos    | SSBD : 8  | Default :24   | Max : 1682
    "test_run" : 0, # define a quantidade de recomendações,                 |           | Default :0    | Max : 943 
    "obs": "base"
})

config.update({
    "model_name" :"gemma-3-4b-it",
    "Arch" : "gemma3",
    "Quantization" : "Q4_K_M",
    "Temperature": 0.1,
    "max_tokens" : -1,  # Default : 4096
    "GPU Offload": 34,
    "CPU Thread Pool Size": 6,
    "Evaluation Batch Size": 512,
    "Flash Attention": False, # não vi vantagem no uso 
})

config_list = []

config1 = config.copy()
config1.update({
    "obs": "testando lenlimits e temperatura",
    "lenlimit": 24,
    "Temperature": 0.0})
config_list.append(config1)

config3 = config.copy()
config3.update({
    "obs": "testando lenlimits e temperatura",
    "Temperature": 0.1,
    "lenlimit": 16,
})
config_list.append(config3)

config4 = config.copy()
config4.update({
    "obs": "testando lenlimits e temperatura",
    "Temperature": 0.0,
    "lenlimit": 16,
})
config_list.append(config4)

config5 = config.copy()
config5.update({
    "obs": "testando lenlimits e temperatura",
    "Temperature": 0.1,
    "lenlimit": 8,
})
config_list.append(config5)

config6 = config.copy()
config6.update({
    "obs": "testando lenlimits e temperatura",
    "Temperature": 0.0,
    "lenlimit": 8,
})
config_list.append(config6)



for i in range(0, len(config_list)):
    config = config_list[i]
    print(f'Rodando configuração {i} de {len(config_list)}')
    try:
        result_pkl = recommender.recommendation_workflow(config         = config,
                                                         dataset        = dataset,
                                                        prompt_template= prompt_template,
                                                        prompt_format  = prompt_format)
        print(f'Configuração {i} de {len(config_list)} finalizada')
    except Exception as e:
        print(f'Erro na configuração {i} de {len(config_list)}')
        print(e)
        continue

Quantidade de Usuários: 943
24
16
16
8
8


In [3]:
for i in range(0, len(config_list)):
    config = config_list[i]
    print(config)

{'prompt_template': {'System_prompt': "You are a movie expert provide the answer for the question based on the given context. If you don't know the answer to a question, please don't share false information.", 'Preference': '\n    ### MY WATCHED MOVIES LIST: {}.\n\n    ### QUESTION: Based on my watched movies list. Tell me what features are most important to me when selecting movies (Summarize my preferences briefly)?\n\n    ### ANSWER:\n    ', 'Featured_movies': '\n\n    ### MY WATCHED MOVIES LIST: {}.\n\n    ### MY MOVIE PREFERENCES: {}.\n\n    ### QUESTION: Create an enumerated list selecting the five most featured movies from the watched movies according to my movie preferences.\n\n    ### ANSWER:\n    ', 'Recommendation': '\n\n    ### CANDIDATE MOVIE SET: {}.\n\n    ### MY WATCHED MOVIES LIST: {}.\n\n    ### MY MOVIE PREFERENCES: {}.\n\n    ### MY FIVE MOST FEATURED MOVIES: {}.\n\n    ### INSTRUCTIONS:\n    Recommend exactly **10 movies** from the "Candidate Movie Set" that are mo

## Resultados

In [None]:
with open(f'{result_pkl}', 'rb') as f:
    data = pickle.load(f)

In [None]:
results = []
for key, value in data.items():
    if isinstance(key, int) and isinstance(value, dict):  # Pegando apenas os experimentos
        results.append({
            'Candidates': value.get('candidate_set', ''),
            'Ground Truth': value.get('ground_truth', ''),
            'gt_in_candidate_set': value.get('gt_in_candidate_set', ''),
            #'Input 1': value.get('input_1', ''),
            #'Predictions 1': value.get('predictions_1', ''),
            #'Input 2': value.get('input_2', ''),
            #'Predictions 2': value.get('predictions_2', ''),
            'Input 3': value.get('input_3', ''),
            'Predictions 3': value.get('predictions_3', ''),
            #'Recommendations': value.get('recommendations', ''),
            'rec_HitRate@10': value.get('rec_HitRate@10', ''),
            #'Precision': value.get('precision', ''),
            #'Recall': value.get('recall', ''),
            'rec_NDCG@10': value.get('rec_NDCG@10', '')
        })

df_results = pd.DataFrame(results)

df_results