In [1]:
%cd ../..

/home/matheus/Desktop/Itens/Projetos/llm2vec-embeddings-classification


In [6]:
import os

def remove_empty_dirs(path):
    # Percorre todas as subpastas do diretório especificado
    for dirpath, dirnames, filenames in os.walk(path, topdown=False):
        # Se a pasta estiver vazia, remove-a
        if not os.listdir(dirpath):
            os.rmdir(dirpath)
            print(f"Removido: {dirpath}")

# Exemplo de uso
diretorio_alvo = "results"
remove_empty_dirs(diretorio_alvo)


Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/instruction_summary_prompt/knn
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/instruction_summary_prompt
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/instruction_classification_prompt/knn
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/instruction_classification_prompt
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/base_prompt/knn
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF/base_prompt
Removido: results/Dmoz-Science.csv/llama_cpp/bartowski_Mistral-Nemo-Instruct-2407-GGUF
Removido: results/Dmoz-Science.csv/llama_cpp/lmstudio-community_Llama-3.2-3B-Instruct-GGUF/instruction_summary_prompt/knn
Removido: results/Dmoz-Science.csv/llama_cpp/lmstudio-community_Llama-3.2-3B-Instruct-GGUF/instruction_summary_

In [2]:
import os
import numpy as np
import pandas as pd
from src.core.utils import read_json

# Configura o Pandas para exibir todas as colunas
pd.set_option('display.max_columns', None)

def load_results_to_dataframe(base_path: str) -> pd.DataFrame:
    """
    Load results from JSON files into a pandas DataFrame.
    """
    results = []
    
    # Traverse the directory structure
    for dataset_name in os.listdir(base_path):
        dataset_path = os.path.join(base_path, dataset_name)
        if os.path.isdir(dataset_path):
            for model_type in os.listdir(dataset_path):
                model_type_path = os.path.join(dataset_path, model_type)
                
                if os.path.isdir(model_type_path):
                    for model_name in os.listdir(model_type_path):
                        model_name_path = os.path.join(model_type_path, model_name)
                        
                        # Define paths based on whether prompt_name is needed
                        if model_type != "bert":
                            subdirs = [os.path.join(model_name_path, prompt) for prompt in os.listdir(model_name_path)]
                        else:
                            subdirs = [model_name_path]
                        
                        # Process results.json files from determined paths
                        for subdir in subdirs:
                            for classifier in os.listdir(subdir):
                                classifier_path = os.path.join(subdir, classifier)
                                
                                # Check for the results.json in the classifier path
                                json_file_path = os.path.join(classifier_path, 'results.json')
                                
                                if os.path.isfile(json_file_path):
                                    result_data = read_json(json_file_path)

                                    keys_to_extract = ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 'mean_test_f1_score', 'embedding_generation_time', 'embedding_generation_size']
    
                                    # Extrai apenas as chaves especificadas
                                    result_data= {key: result_data.get(key) for key in keys_to_extract}
                                    
                                    # Add metadata to the result data
                                    result_data['dataset_name'] = dataset_name
                                    result_data['model_type'] = model_type
                                    result_data['model_name'] = model_name
                                    result_data['classifier'] = classifier
                                    
                                    # Add prompt_name if applicable
                                    if model_type != "bert":
                                        result_data['prompt_name'] = os.path.basename(subdir)
                                    else:
                                        result_data['prompt_name'] = None
                                    
                                    results.append(result_data)

    # Create a DataFrame from the results
    results_df = pd.DataFrame(results)

    # Specify the order of the columns
    columns_first = ['dataset_name', 'model_type', 'model_name', 'classifier']
    if 'prompt_name' in results_df.columns:
        columns_first.append('prompt_name')
    column_order = columns_first + [col for col in results_df.columns if col not in columns_first]
    results_df = results_df[column_order]
    
    return results_df



In [3]:
# Usage
base_path = 'results' 
results_df = load_results_to_dataframe(base_path)

print("DataFrame of Results:")
display(results_df)

# Save the DataFrame to a CSV file
results_df.to_csv(f'{base_path}/resume/result_resume.csv', index=False)  # Include index=False to avoid saving the index as a column

DataFrame of Results:


Unnamed: 0,dataset_name,model_type,model_name,classifier,prompt_name,mean_test_accuracy,mean_test_precision,mean_test_recall,mean_test_f1_score,embedding_generation_time,embedding_generation_size
0,Dmoz-Science.csv,bert,sentence-transformers_all-distilroberta-v1,knn,,0.793333,0.801164,0.793333,0.791258,4.378122,18432128
1,Dmoz-Science.csv,bert,sentence-transformers_all-MiniLM-L12-v2,knn,,0.799000,0.806622,0.799000,0.795900,2.609955,9216128
2,Dmoz-Science.csv,bert,sentence-transformers_all-MiniLM-L6-v2,knn,,0.801500,0.808912,0.801500,0.798932,2.709429,9216128
3,Dmoz-Science.csv,bert,sentence-transformers_all-mpnet-base-v2,knn,,0.824333,0.828456,0.824333,0.822382,3.978258,18432128
4,Dmoz-Science.csv,llama_cpp,bartowski_aya-expanse-8b-GGUF,knn,instruction_summary_prompt,0.083333,0.006944,0.083333,0.012821,181.245077,53080
...,...,...,...,...,...,...,...,...,...,...,...
192,SyskillWebert.csv,llama_cpp,bartowski_aya-expanse-8b-GGUF,knn,instruction_classification_prompt,0.194618,0.048654,0.250000,0.081456,11.609850,2872
193,SyskillWebert.csv,llama_cpp,bartowski_aya-expanse-8b-GGUF,knn,base_prompt,0.194618,0.048654,0.250000,0.081456,10.640329,2872
194,SyskillWebert.csv,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,knn,instruction_summary_prompt,0.799593,0.766043,0.767854,0.759954,22.862018,2872
195,SyskillWebert.csv,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,knn,instruction_classification_prompt,0.811352,0.774754,0.781627,0.775362,23.037518,2872


In [5]:
datasets = results_df['dataset_name'].unique()

for dataset in datasets:
    print(f"Dataset: {dataset}")
    sorted_df = results_df[results_df['dataset_name'] == dataset].sort_values(by="mean_test_f1_score", ascending=False)
    sorted_df = sorted_df.drop(["dataset_name", "classifier", "embedding_generation_size"], axis=1)
    
    # Renomeia as colunas usando os nomes já modificados no DataFrame anterior
    sorted_df = sorted_df.rename(columns={
        'model_type': "Tipo de Modelo",
        "model_name": "Nome do Modelo",
        "prompt_name": "Prompt",
        'mean_test_accuracy': 'Acurácia',
        'mean_test_precision': 'Precisão',
        'mean_test_recall': 'Recall',
        'mean_test_f1_score': 'F1 Score',
        'embedding_generation_time': "Tempo de geração de embeddings"
    })
    
    # Arredonda as colunas numéricas para 3 casas decimais
    sorted_df[['Acurácia', 'Precisão', 'Recall', 'F1 Score', 'Tempo de geração de embeddings']] = sorted_df[['Acurácia', 'Precisão', 'Recall', 'F1 Score', 'Tempo de geração de embeddings']].round(3)
    sorted_df = sorted_df.fillna('-')
#     replace = {
#     'base_prompt': 'base prompt',
#     'instruction_classification_prompt': 'instruction classification prompt',
#     'instruction_summary_prompt': 'instruction summary prompt'
# }

#     # Substituir todas as ocorrências conforme o dicionário
#     sorted_df= sorted_df.replace(replace)

    sorted_df = sorted_df.replace({'sentence-transformers_': '',
                                   'McGill-NLP_LLM2Vec-': '',
                                   'base_prompt': 'BP',
                                   'instruction_classification_prompt': 'ICP',
                                   'instruction_summary_prompt': 'ISP'}, 
                                   regex=True)
    display(sorted_df)
    
    # Salva o DataFrame em CSV
    sorted_df.to_csv(f'{base_path}/resume/{dataset}', index=False)

Dataset: Dmoz-Science.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
18,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,BP,0.852,0.858,0.852,0.851,57.135
25,llm2vec,Sheared-LLaMA-mntp-supervised,ISP,0.834,0.84,0.834,0.831,32.557
23,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ICP,0.833,0.837,0.833,0.83,171.946
15,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,BP,0.832,0.84,0.832,0.829,60.97
14,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ICP,0.829,0.835,0.829,0.826,154.041
27,llm2vec,Sheared-LLaMA-mntp-supervised,BP,0.828,0.835,0.828,0.825,21.297
3,bert,all-mpnet-base-v2,-,0.824,0.828,0.824,0.822,3.978
12,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,BP,0.824,0.829,0.824,0.822,51.425
24,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,BP,0.821,0.826,0.821,0.818,59.234
22,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ISP,0.817,0.823,0.817,0.814,117.917


Dataset: re8.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
31,bert,all-mpnet-base-v2,-,0.968,0.937,0.925,0.93,9.976
29,bert,all-MiniLM-L12-v2,-,0.958,0.934,0.915,0.924,4.679
28,bert,all-distilroberta-v1,-,0.96,0.926,0.899,0.911,6.699
30,bert,all-MiniLM-L6-v2,-,0.954,0.925,0.893,0.906,4.205
36,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.958,0.909,0.854,0.876,363.213
35,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.956,0.898,0.859,0.875,346.053
37,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.954,0.907,0.849,0.872,344.423
32,llama_cpp,bartowski_aya-expanse-8b-GGUF,ISP,0.299,0.037,0.125,0.057,276.558
33,llama_cpp,bartowski_aya-expanse-8b-GGUF,ICP,0.299,0.037,0.125,0.057,260.701
34,llama_cpp,bartowski_aya-expanse-8b-GGUF,BP,0.299,0.037,0.125,0.057,210.111


Dataset: Dmoz-Computers.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
52,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,BP,0.809,0.809,0.804,0.797,75.441
58,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,BP,0.796,0.794,0.792,0.785,87.927
49,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,BP,0.797,0.798,0.793,0.783,131.421
57,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ICP,0.791,0.789,0.787,0.778,258.517
56,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ISP,0.79,0.787,0.786,0.778,153.138
46,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,BP,0.788,0.786,0.785,0.777,76.674
44,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,ISP,0.783,0.785,0.779,0.772,130.051
45,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,ICP,0.781,0.779,0.778,0.77,215.624
59,llm2vec,Sheared-LLaMA-mntp-supervised,ISP,0.779,0.778,0.775,0.769,41.051
61,llm2vec,Sheared-LLaMA-mntp-supervised,BP,0.78,0.777,0.775,0.768,47.868


Dataset: NSF.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
80,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,BP,0.898,0.905,0.892,0.896,55.15
74,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,BP,0.894,0.892,0.89,0.89,60.675
72,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,ISP,0.889,0.891,0.886,0.887,117.35
78,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,ISP,0.888,0.894,0.882,0.886,137.55
65,bert,all-mpnet-base-v2,-,0.885,0.889,0.883,0.885,3.712
84,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ISP,0.884,0.886,0.878,0.881,131.128
63,bert,all-MiniLM-L12-v2,-,0.881,0.886,0.876,0.879,2.771
77,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,BP,0.882,0.887,0.875,0.879,79.218
76,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ICP,0.881,0.885,0.876,0.878,237.751
73,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,ICP,0.88,0.885,0.874,0.877,215.68


Dataset: Industry Sector.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
108,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,BP,0.938,0.941,0.932,0.936,512.522
105,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,BP,0.932,0.937,0.924,0.93,565.922
106,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,ISP,0.925,0.923,0.918,0.92,540.101
104,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ICP,0.918,0.923,0.909,0.915,636.968
107,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,ICP,0.914,0.921,0.905,0.912,587.214
115,llm2vec,Sheared-LLaMA-mntp-supervised,ISP,0.905,0.909,0.893,0.9,197.315
117,llm2vec,Sheared-LLaMA-mntp-supervised,BP,0.899,0.917,0.883,0.898,179.943
103,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ISP,0.901,0.907,0.889,0.897,588.889
116,llm2vec,Sheared-LLaMA-mntp-supervised,ICP,0.883,0.913,0.862,0.885,188.775
91,bert,all-MiniLM-L12-v2,-,0.873,0.895,0.854,0.873,10.267


Dataset: Dmoz-Sports.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
123,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.899,0.904,0.899,0.9,628.259
121,bert,all-mpnet-base-v2,-,0.877,0.882,0.877,0.876,9.356
119,bert,all-MiniLM-L12-v2,-,0.872,0.876,0.872,0.871,6.374
118,bert,all-distilroberta-v1,-,0.871,0.873,0.871,0.87,6.791
124,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.862,0.87,0.862,0.862,469.587
120,bert,all-MiniLM-L6-v2,-,0.861,0.865,0.861,0.86,4.534
122,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.855,0.863,0.855,0.854,542.581


Dataset: review_polarity.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
139,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ICP,0.85,0.868,0.85,0.849,158.961
138,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,ISP,0.844,0.854,0.844,0.843,162.43
142,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,ICP,0.844,0.853,0.844,0.842,169.862
141,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,ISP,0.817,0.822,0.816,0.816,218.696
143,llm2vec,Meta-Llama-3-8B-Instruct-mntp-supervised,BP,0.798,0.801,0.798,0.798,192.309
140,llm2vec,Mistral-7B-Instruct-v2-mntp-supervised,BP,0.796,0.802,0.796,0.794,146.74
133,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.785,0.807,0.785,0.78,152.869
148,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,ICP,0.777,0.79,0.777,0.774,158.887
149,llm2vec,Mistral-7B-Instruct-v2-mntp-unsup-simcse,BP,0.772,0.787,0.772,0.769,147.026
135,llm2vec,Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,ISP,0.764,0.782,0.764,0.76,160.104


Dataset: CSTR.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
156,bert,all-mpnet-base-v2,-,0.896,0.917,0.92,0.914,2.318
153,bert,all-distilroberta-v1,-,0.893,0.907,0.918,0.909,2.011
162,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.876,0.903,0.9,0.898,23.307
154,bert,all-MiniLM-L12-v2,-,0.87,0.898,0.899,0.896,1.778
161,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.866,0.898,0.89,0.89,19.305
160,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.856,0.891,0.884,0.884,18.994
155,bert,all-MiniLM-L6-v2,-,0.869,0.881,0.893,0.883,1.787
157,llama_cpp,bartowski_aya-expanse-8b-GGUF,ISP,0.428,0.107,0.25,0.15,10.476
158,llama_cpp,bartowski_aya-expanse-8b-GGUF,ICP,0.428,0.107,0.25,0.15,10.633
159,llama_cpp,bartowski_aya-expanse-8b-GGUF,BP,0.428,0.107,0.25,0.15,14.614


Dataset: Dmoz-Health.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
166,bert,all-mpnet-base-v2,-,0.881,0.882,0.881,0.88,6.436
164,bert,all-MiniLM-L12-v2,-,0.873,0.873,0.873,0.872,4.156
165,bert,all-MiniLM-L6-v2,-,0.871,0.871,0.871,0.87,4.138
163,bert,all-distilroberta-v1,-,0.863,0.863,0.863,0.861,5.279


Dataset: webkb-parsed.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
174,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.781,0.719,0.689,0.677,537.236
176,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.775,0.715,0.681,0.671,535.921
175,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.786,0.709,0.681,0.671,546.612
168,bert,all-MiniLM-L12-v2,-,0.676,0.58,0.559,0.544,10.62
169,bert,all-MiniLM-L6-v2,-,0.659,0.556,0.569,0.535,10.893
167,bert,all-distilroberta-v1,-,0.648,0.533,0.559,0.524,22.957
170,bert,all-mpnet-base-v2,-,0.608,0.487,0.496,0.471,29.644
171,llama_cpp,bartowski_aya-expanse-8b-GGUF,ISP,0.198,0.028,0.143,0.047,277.232
172,llama_cpp,bartowski_aya-expanse-8b-GGUF,ICP,0.198,0.028,0.143,0.047,265.916
173,llama_cpp,bartowski_aya-expanse-8b-GGUF,BP,0.198,0.028,0.143,0.047,226.918


Dataset: classic4.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
185,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.983,0.986,0.985,0.985,358.21
186,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.982,0.984,0.984,0.984,356.022
184,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.982,0.984,0.983,0.984,371.398
178,bert,all-MiniLM-L12-v2,-,0.977,0.979,0.98,0.979,7.623
179,bert,all-MiniLM-L6-v2,-,0.977,0.978,0.981,0.979,5.117
177,bert,all-distilroberta-v1,-,0.977,0.977,0.981,0.979,8.978
180,bert,all-mpnet-base-v2,-,0.976,0.976,0.981,0.978,15.024
181,llama_cpp,bartowski_aya-expanse-8b-GGUF,ISP,0.452,0.113,0.25,0.156,277.439
182,llama_cpp,bartowski_aya-expanse-8b-GGUF,ICP,0.452,0.113,0.25,0.156,215.116
183,llama_cpp,bartowski_aya-expanse-8b-GGUF,BP,0.452,0.113,0.25,0.156,194.114


Dataset: SyskillWebert.csv


Unnamed: 0,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
188,bert,all-MiniLM-L12-v2,-,0.925,0.926,0.913,0.916,2.737
189,bert,all-MiniLM-L6-v2,-,0.925,0.918,0.912,0.913,2.396
190,bert,all-mpnet-base-v2,-,0.916,0.911,0.9,0.903,5.311
187,bert,all-distilroberta-v1,-,0.913,0.909,0.895,0.899,4.097
196,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,BP,0.815,0.788,0.782,0.776,21.993
195,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ICP,0.811,0.775,0.782,0.775,23.038
194,llama_cpp,SanctumAI_gemma-2-9b-it-GGUF,ISP,0.8,0.766,0.768,0.76,22.862
191,llama_cpp,bartowski_aya-expanse-8b-GGUF,ISP,0.195,0.049,0.25,0.081,11.508
192,llama_cpp,bartowski_aya-expanse-8b-GGUF,ICP,0.195,0.049,0.25,0.081,11.61
193,llama_cpp,bartowski_aya-expanse-8b-GGUF,BP,0.195,0.049,0.25,0.081,10.64


In [None]:
results_df.columns

Index(['dataset_name', 'model_type', 'model_name', 'classifier', 'prompt_name',
       'mean_test_accuracy', 'mean_test_precision', 'mean_test_recall',
       'mean_test_f1_score', 'embedding_generation_time',
       'embedding_generation_size'],
      dtype='object')

In [None]:
import pdfkit

# Lista de datasets únicos
datasets = results_df['dataset_name'].unique()

columns = ['model_type', 'model_name', 'fit_time', 'score_time', 'test_accuracy',  'test_precision', 'test_recall', 'test_f1_score','embedding_generation_time', 'embeddings_size']

# HTML para o PDF consolidado
html_content = ""

for dataset in datasets:
    # Filtra o DataFrame por dataset
    filtered_df = results_df[results_df['dataset_name'] == dataset]
    filtered_df = filtered_df[columns]
    filtered_df = filtered_df.sort_values(by = "test_f1_score", ascending = False)
    
    # Adiciona título e conteúdo do DataFrame ao HTML
    html_content += f"<h2>Dataset: {dataset}</h2>"
    html_content += filtered_df.to_html(index=False)
    html_content += "<br><br>"  # Adiciona um espaço entre os datasets

# Gera o PDF consolidado
pdf_filename = "consolidated_report.pdf"
pdfkit.from_string(html_content, pdf_filename)
print(f"PDF gerado: {pdf_filename}")

KeyError: "['fit_time', 'score_time', 'test_accuracy', 'test_precision', 'test_recall', 'test_f1_score', 'embeddings_size'] not in index"