In [1]:
%cd ../..

/home/matheus/Desktop/Itens/Projetos/llm2vec-embeddings-classification


In [2]:
import os

def remove_empty_dirs(path):
    # Percorre todas as subpastas do diretório especificado
    for dirpath, dirnames, filenames in os.walk(path, topdown=False):
        # Se a pasta estiver vazia, remove-a
        if not os.listdir(dirpath):
            os.rmdir(dirpath)
            print(f"Removido: {dirpath}")

# Exemplo de uso
diretorio_alvo = "results"
remove_empty_dirs(diretorio_alvo)


In [3]:
import os
import numpy as np
import pandas as pd
from src.core.utils import read_json

# Configura o Pandas para exibir todas as colunas
pd.set_option('display.max_columns', None)

def load_results_to_dataframe(base_path: str) -> pd.DataFrame:
    """
    Load results from JSON files into a pandas DataFrame.
    """
    results = []
    
    # Traverse the directory structure
    for dataset_name in os.listdir(base_path):
        dataset_path = os.path.join(base_path, dataset_name)
        if os.path.isdir(dataset_path):
            for model_type in os.listdir(dataset_path):
                model_type_path = os.path.join(dataset_path, model_type)
                
                if os.path.isdir(model_type_path):
                    for model_name in os.listdir(model_type_path):
                        model_name_path = os.path.join(model_type_path, model_name)
                        
                        # Define paths based on whether prompt_name is needed
                        if model_type != "bert":
                            subdirs = [os.path.join(model_name_path, prompt) for prompt in os.listdir(model_name_path)]
                        else:
                            subdirs = [model_name_path]
                        
                        # Process results.json files from determined paths
                        for subdir in subdirs:
                            embedding_time = read_json(f"{subdir}/embedding_time.json")['embedding_generation_time']
                            for classifier in os.listdir(subdir):
                                classifier_path = os.path.join(subdir, classifier)
                                
                                # Check for the results.json in the classifier path
                                json_file_path = os.path.join(classifier_path, 'results.json')
                                
                                if os.path.isfile(json_file_path):
                                    result_data = read_json(json_file_path)

                                    #keys_to_extract = ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 'mean_test_f1_score', 'mean_score_time']
                                    keys_to_extract = ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 'mean_test_f1_score']
                                    # Extrai apenas as chaves especificadas
                                    result_data= {key: result_data.get(key) for key in keys_to_extract}
                                    
                                    # Add metadata to the result data
                                    result_data['dataset_name'] = dataset_name
                                    result_data['model_type'] = model_type
                                    result_data['model_name'] = model_name
                                    result_data['classifier'] = classifier
                                    #result_data['embedding_time'] = embedding_time
                                    
                                    # Add prompt_name if applicable
                                    if model_type != "bert":
                                        result_data['prompt_name'] = os.path.basename(subdir)
                                    else:
                                        result_data['prompt_name'] = None
                                    
                                    
                                    results.append(result_data)

    # Create a DataFrame from the results
    results_df = pd.DataFrame(results)

    # Specify the order of the columns
    columns_first = ['dataset_name', 'model_type', 'model_name', 'classifier']
    if 'prompt_name' in results_df.columns:
        columns_first.append('prompt_name')
    column_order = columns_first + [col for col in results_df.columns if col not in columns_first]
    results_df = results_df[column_order]
    
    return results_df



In [4]:
# Usage
base_path = 'results' 
results_df = load_results_to_dataframe(base_path)

print("DataFrame of Results:")
display(results_df)

# Save the DataFrame to a CSV file
results_df.to_csv(f'{base_path}/resume/result_resume.csv', index=False)  # Include index=False to avoid saving the index as a column

DataFrame of Results:


Unnamed: 0,dataset_name,model_type,model_name,classifier,prompt_name,mean_test_accuracy,mean_test_precision,mean_test_recall,mean_test_f1_score
0,Dmoz-Science.csv,ollama,phi3.5:3.8b,knn,instruction_summary_prompt,0.564167,0.565877,0.564167,0.550025
1,Dmoz-Science.csv,ollama,phi3.5:3.8b,knn,instruction_classification_prompt,0.570167,0.571426,0.570167,0.559885
2,Dmoz-Science.csv,ollama,phi3.5:3.8b,knn,base_prompt,0.486000,0.480398,0.486000,0.469586
3,Dmoz-Science.csv,ollama,mistral:7b,knn,instruction_summary_prompt,0.595000,0.598839,0.595000,0.585548
4,Dmoz-Science.csv,ollama,mistral:7b,knn,instruction_classification_prompt,0.650000,0.652898,0.650000,0.642536
...,...,...,...,...,...,...,...,...,...
395,SyskillWebert.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp...,knn,instruction_classification_prompt,0.867843,0.869383,0.838807,0.845828
396,SyskillWebert.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp...,knn,base_prompt,0.882813,0.901404,0.854945,0.869171
397,SyskillWebert.csv,llm2vec,McGill-NLP_LLM2Vec-Sheared-LLaMA-mntp-supervised,knn,instruction_summary_prompt,0.909950,0.922364,0.888690,0.900603
398,SyskillWebert.csv,llm2vec,McGill-NLP_LLM2Vec-Sheared-LLaMA-mntp-supervised,knn,instruction_classification_prompt,0.816870,0.849021,0.774954,0.789926


In [5]:
from IPython.display import display
pd.set_option('display.max_colwidth', None)

datasets = results_df['dataset_name'].unique()
ignore_models = ["phi3.5:3.8b"]

for dataset in datasets:
    print(f"Dataset: {dataset}")
    
    # Filtra o DataFrame pelo dataset atual
    sorted_df = results_df[results_df['dataset_name'] == dataset].sort_values(by="mean_test_f1_score", ascending=False)
    
    # Remove colunas desnecessárias e modelos ignorados
    sorted_df = sorted_df.drop(["dataset_name", "classifier"], axis=1)
    sorted_df = sorted_df[~sorted_df['model_name'].isin(ignore_models)]
    
    # Renomeia as colunas
    sorted_df = sorted_df.rename(columns={
        'model_type': "Model Type",
        "model_name": "Model Name",
        "prompt_name": "Prompt",
        'mean_test_accuracy': 'Accuracy',
        'mean_test_precision': 'Precision',
        'mean_test_recall': 'Recall',
        'mean_test_f1_score': 'F1',
        'embedding_time': "Embedding Time"
    })
    
    # Arredonda as colunas numéricas para 3 casas decimais
    sorted_df = sorted_df.round(3)
    
    # Preenche valores NaN com '-'
    sorted_df = sorted_df.fillna('-')
    
    # Substituições de strings
    sorted_df = sorted_df.replace({
        'sentence-transformers_': '',
        'McGill-NLP_LLM2Vec-': '',
        'base_prompt': 'BP',
        'instruction_classification_prompt': 'ICP',
        'instruction_summary_prompt': 'ISP',
        'bert': 'BERT',
        'ollama': 'LLM',
        'llm2vec': 'LLM2Vec',
        'Sheared-LLaMA-mntp-supervised': 'Sheared-LLaMA-1.3B-mntp-supervised',
        'Sheared-LLaMA-mntp-unsup-simcse': 'Sheared-LLaMA-1.3B-mntp-unsup-simcse',
        'qwen2.5:7b': 'Qwen 2.5 - 7B',
        'gemma2:9b': 'Gemma2 - 9B',
        'mistral:7b': 'Mistral - 7B',
        'llama3.2:3b': 'Llama 3.2 - 3B',
        '-mntp': '',
    }, regex=True)
    
    # Encontrando os modelos com os valores máximos para cada métrica
    max_accuracy_df = sorted_df[sorted_df['Accuracy'] == sorted_df['Accuracy'].max()]
    max_precision_df = sorted_df[sorted_df['Precision'] == sorted_df['Precision'].max()]
    max_recall_df = sorted_df[sorted_df['Recall'] == sorted_df['Recall'].max()]
    max_f1_df = sorted_df[sorted_df['F1'] == sorted_df['F1'].max()]
    
    # Organiza os resultados em um DataFrame mais legível
    max_metrics_df = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1'],
        'Model Name': [
            ', '.join(max_accuracy_df['Model Name']),
            ', '.join(max_precision_df['Model Name']),
            ', '.join(max_recall_df['Model Name']),
            ', '.join(max_f1_df['Model Name'])
        ],
        'Prompt': [
            ', '.join(max_accuracy_df['Prompt']),
            ', '.join(max_precision_df['Prompt']),
            ', '.join(max_recall_df['Prompt']),
            ', '.join(max_f1_df['Prompt'])
        ],
        'Value': [
            max_accuracy_df['Accuracy'].max(),
            max_precision_df['Precision'].max(),
            max_recall_df['Recall'].max(),
            max_f1_df['F1'].max()
        ]
    })
    
    # Exibe os resultados de forma mais clara
    display(max_metrics_df)
    print("-" * 50)


Dataset: Dmoz-Science.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Sheared-LLaMA-1.3B-supervised,ISP,0.836
1,Precision,Meta-Llama-3-8B-Instruct-supervised,BP,0.842
2,Recall,Sheared-LLaMA-1.3B-supervised,ISP,0.836
3,F1,Sheared-LLaMA-1.3B-supervised,ISP,0.833


--------------------------------------------------
Dataset: re8.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Mistral-7B-Instruct-v2-supervised,BP,0.97
1,Precision,"Meta-Llama-3-8B-Instruct-supervised, Mistral-7B-Instruct-v2-supervised","ISP, ICP",0.939
2,Recall,all-mpnet-base-v2,-,0.925
3,F1,all-mpnet-base-v2,-,0.93


--------------------------------------------------
Dataset: Dmoz-Computers.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Mistral-7B-Instruct-v2-unsup-simcse,BP,0.794
1,Precision,Mistral-7B-Instruct-v2-unsup-simcse,BP,0.791
2,Recall,Mistral-7B-Instruct-v2-unsup-simcse,BP,0.79
3,F1,Mistral-7B-Instruct-v2-unsup-simcse,BP,0.783


--------------------------------------------------
Dataset: NSF.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Meta-Llama-3-8B-Instruct-supervised,BP,0.89
1,Precision,Meta-Llama-3-8B-Instruct-supervised,BP,0.895
2,Recall,Meta-Llama-3-8B-Instruct-supervised,BP,0.886
3,F1,Meta-Llama-3-8B-Instruct-supervised,BP,0.889


--------------------------------------------------
Dataset: Industry Sector.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Meta-Llama-3-8B-Instruct-supervised,BP,0.942
1,Precision,Meta-Llama-3-8B-Instruct-supervised,BP,0.946
2,Recall,Meta-Llama-3-8B-Instruct-supervised,BP,0.934
3,F1,Meta-Llama-3-8B-Instruct-supervised,BP,0.94


--------------------------------------------------
Dataset: Dmoz-Sports.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Meta-Llama-3-8B-Instruct-supervised,BP,0.95
1,Precision,Meta-Llama-3-8B-Instruct-supervised,BP,0.951
2,Recall,Meta-Llama-3-8B-Instruct-supervised,BP,0.95
3,F1,Meta-Llama-3-8B-Instruct-supervised,BP,0.95


--------------------------------------------------
Dataset: review_polarity.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Mistral - 7B,BP,0.934
1,Precision,Mistral - 7B,BP,0.935
2,Recall,Mistral - 7B,BP,0.934
3,F1,Mistral - 7B,BP,0.934


--------------------------------------------------
Dataset: CSTR.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,"Mistral-7B-Instruct-v2-unsup-simcse, all-mpnet-base-v2","ICP, -",0.896
1,Precision,Mistral-7B-Instruct-v2-supervised,BP,0.929
2,Recall,all-mpnet-base-v2,-,0.92
3,F1,Meta-Llama-3-8B-Instruct-supervised,BP,0.918


--------------------------------------------------
Dataset: Dmoz-Health.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,Meta-Llama-3-8B-Instruct-supervised,BP,0.912
1,Precision,Meta-Llama-3-8B-Instruct-supervised,BP,0.914
2,Recall,Meta-Llama-3-8B-Instruct-supervised,BP,0.912
3,F1,Meta-Llama-3-8B-Instruct-supervised,BP,0.911


--------------------------------------------------
Dataset: classic4.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,"Llama 3.2 - 3B, Llama 3.2 - 3B, Mistral - 7B","BP, ICP, ICP",0.988
1,Precision,Mistral - 7B,ICP,0.991
2,Recall,Qwen 2.5 - 7B,BP,0.989
3,F1,"Llama 3.2 - 3B, Llama 3.2 - 3B, Qwen 2.5 - 7B, Mistral - 7B","BP, ICP, BP, ICP",0.989


--------------------------------------------------
Dataset: SyskillWebert.csv


Unnamed: 0,Metric,Model Name,Prompt,Value
0,Accuracy,all-MiniLM-L12-v2,-,0.925
1,Precision,Mistral-7B-Instruct-v2-supervised,ISP,0.932
2,Recall,all-MiniLM-L12-v2,-,0.911
3,F1,all-MiniLM-L12-v2,-,0.914


--------------------------------------------------


In [None]:
datasets = results_df['dataset_name'].unique()
ignore_models = ["phi3.5:3.8b"]

for dataset in datasets:
    print(f"Dataset: {dataset}")
    
    # Filtra o DataFrame pelo dataset atual
    sorted_df = results_df[results_df['dataset_name'] == dataset]
    
    # Remove colunas desnecessárias e modelos ignorados
    sorted_df = sorted_df.drop(["dataset_name", "classifier"], axis=1)
    sorted_df = sorted_df[~sorted_df['model_name'].isin(ignore_models)]
    
    # Renomeia as colunas
    sorted_df = sorted_df.rename(columns={
        'model_type': "Model Type",
        "model_name": "Model Name",
        "prompt_name": "Prompt",
        'mean_test_accuracy': 'Accuracy',
        'mean_test_precision': 'Precision',
        'mean_test_recall': 'Recall',
        'mean_test_f1_score': 'F1',
        'embedding_time': "Embedding Time"
    })
    
    # Arredonda as colunas numéricas para 3 casas decimais
    sorted_df = sorted_df.round(3)
    sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
    
    # Preenche valores NaN com '-'
    sorted_df = sorted_df.fillna('-')
    
    # Substituições de strings
    sorted_df = sorted_df.replace({
        'sentence-transformers_': '',
        'McGill-NLP_LLM2Vec-': '',
        'base_prompt': 'BP',
        'instruction_classification_prompt': 'ICP',
        'instruction_summary_prompt': 'ISP',
        'bert': 'BERT',
        'ollama': 'LLM',
        'llm2vec': 'LLM2Vec',
        'Sheared-LLaMA-mntp-supervised': 'Sheared-LLaMA-1.3B-mntp-supervised',
        'Sheared-LLaMA-mntp-unsup-simcse': 'Sheared-LLaMA-1.3B-mntp-unsup-simcse',
        'qwen2.5:7b': 'Qwen 2.5 - 7B',
        'gemma2:9b': 'Gemma2 - 9B',
        'mistral:7b': 'Mistral - 7B',
        'llama3.2:3b': 'Llama 3.2 - 3B',
        '-mntp': '',
    }, regex=True)
    
    # Garante que os nomes dos modelos estejam corretos
    sorted_df['Model Name'] = sorted_df['Model Name'].str.replace('_', ' ').str.title()

    # Definir as ordens
    model_type_order = ['BERT', 'LLM', 'LLM2Vec']
    model_name_order = {
        'BERT': ['All-Distilroberta-V1', 'All-Minilm-L12-V2', 'All-Minilm-L6-V2', 'All-Mpnet-Base-V2'],
        'LLM': ['Gemma2 - 9B', 'Qwen 2.5 - 7B', 'Mistral - 7B', 'Llama 3.2 - 3B'],
        'LLM2Vec': [
            'Sheared-Llama-1.3B-Unsup-Simcse',
            'Sheared-Llama-1.3B-Supervised',
            'Meta-Llama-3-8B-Instruct-Unsup-Simcse',    # Removido "-Mntp"
            'Meta-Llama-3-8B-Instruct-Supervised',     # Removido "-Mntp"
            'Mistral-7B-Instruct-V2-Unsup-Simcse', # Removido "-Mntp"
            'Mistral-7B-Instruct-V2-Supervised'    # Removido "-Mntp"
        ]
    }
    prompt_order = ['BP', 'ISP', 'ICP']
    
    # Criar DataFrame de referência com a ordem desejada
    ref_data = []
    for model_type in model_type_order:
        for model_name in model_name_order[model_type]:
            if model_type == 'BERT':
                # BERT não tem prompts
                ref_data.append({'Model Type': model_type, 'Model Name': model_name, 'Prompt': '-'})
            else:
                for prompt in prompt_order:
                    ref_data.append({'Model Type': model_type, 'Model Name': model_name, 'Prompt': prompt})
    
    ref_df = pd.DataFrame(ref_data)
    
    # Adicionar uma coluna de prioridade ao DataFrame de referência
    ref_df['priority'] = ref_df.index
    
    # Mesclar o DataFrame original com o de referência
    merged_df = pd.merge(
        sorted_df,
        ref_df,
        on=['Model Type', 'Model Name', 'Prompt'],
        how='left'
    )
    
    # Ordenar pelo campo 'priority' e remover a coluna temporária
    df_final = merged_df.sort_values(by='priority').drop(columns='priority')
    
    
    # Exibir o DataFrame ordenado
    display(df_final)
    
    # Salva o DataFrame em CSV (usando df_final, que está ordenado)
    df_final.to_csv(f'{base_path}/resume/{dataset}', index=False)

In [26]:
import pandas as pd

pd.set_option('display.max_colwidth', 500)

ignore_models = ["phi3.5:3.8b"]

datasets = results_df['dataset_name'].unique()

# DataFrame para armazenar os melhores resultados
best_results_df = pd.DataFrame()

# Loop para cada dataset
for dataset in datasets:    
    # Filtra os dados para o dataset atual
    dataset_df = results_df[results_df['dataset_name'] == dataset]
    dataset_df = dataset_df[~dataset_df['model_name'].isin(ignore_models)]
    
    # Filtra cada tipo de modelo (model_type)
    model_types = dataset_df['model_type'].unique()
    for model_type in model_types:
        # Filtra os dados para o tipo de modelo atual
        model_type_df = dataset_df[dataset_df['model_type'] == model_type]
        
        # Ordena os resultados pelo F1 Score
        sorted_df = model_type_df.sort_values(by="mean_test_f1_score", ascending=False)
        
        # Remove colunas irrelevantes
        sorted_df = sorted_df.drop(["dataset_name", "classifier"], axis=1)
        
        # Renomeia as colunas para os nomes desejados
        sorted_df = sorted_df.rename(columns={
            'model_type': "Tipo de Modelo",
            "model_name": "Nome do Modelo",
            "prompt_name": "Prompt",
            'mean_test_accuracy': 'Acurácia',
            'mean_test_precision': 'Precisão',
            'mean_test_recall': 'Recall',
            'mean_test_f1_score': 'F1 Score',
        })
        
        sorted_df = sorted_df.replace({
            'sentence-transformers_': '',
            'McGill-NLP_LLM2Vec-': '',
            'base_prompt': 'BP',
            'instruction_classification_prompt': 'ICP',
            'instruction_summary_prompt': 'ISP',
            'bert': 'BERT',
            'ollama': 'LLM',
            'llm2vec': 'LLM2Vec',
            'Sheared-LLaMA-mntp-supervised': 'Sheared-LLaMA-1.3B-mntp-supervised',
            'Sheared-LLaMA-mntp-unsup-simcse': 'Sheared-LLaMA-1.3B-mntp-unsup-simcse',
            'qwen2.5:7b': 'Qwen 2.5 - 7B',
            'gemma2:9b': 'Gemma2 - 9B',
            'mistral:7b': 'Mistral - 7B',
            'llama3.2:3b': 'Llama 3.2 - 3B',
            '-mntp': '',
        }, regex=True)

        # Arredonda as colunas numéricas para 3 casas decimais
        sorted_df = sorted_df.round(3)
        sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
        
        # Preenche valores NaN com '-'
        sorted_df = sorted_df.fillna('-')
        
        # Seleciona o melhor resultado para o tipo de modelo atual
        best_row = sorted_df.iloc[0][['Prompt', 'Acurácia', 'Precisão', 'Recall', 'F1 Score']]
        
        # Adiciona o Nome do Dataset e Tipo de Modelo
        best_row['Dataset'] = dataset[:-4]
        best_row['Tipo de Modelo'] = sorted_df.iloc[0]['Tipo de Modelo']
        best_row['Nome do Modelo'] = sorted_df.iloc[0]['Nome do Modelo']
        
        # Adiciona o melhor resultado na tabela
        best_results_df = pd.concat([best_results_df, best_row.to_frame().T], ignore_index=True)

# Ordena o DataFrame final por Dataset e F1 Score em ordem decrescente
#best_results_df = best_results_df.loc[best_results_df.groupby("Dataset")["F1 Score"].idxmax()]
best_results_df = best_results_df.sort_values(by=["Dataset", "F1 Score"], ascending=[True, False])

# Ordena as colunas na ordem desejada
columns_order = ['Dataset', 'Tipo de Modelo', 'Nome do Modelo', 'Prompt', 'Acurácia', 'Precisão', 'Recall', 'F1 Score']
best_results_df = best_results_df[columns_order]

#Define a ordem personalizada dos datasets
dataset_order = [
    "CSTR", "SyskillWebert", "review_polarity", "Dmoz-Science", "Dmoz-Health", 
    "classic4", "re8", "Industry Sector", "Dmoz-Computers", "NSF", "Dmoz-Sports"
]

# Define a coluna "Dataset" como uma categoria ordenada
best_results_df["Dataset"] = pd.Categorical(best_results_df["Dataset"], categories=dataset_order, ordered=True)

# Define a ordem personalizada dos tipos de modelo
model_order = ["BERT", "LLM", "LLM2Vec"]

# Define a coluna "Tipo de Modelo" como uma categoria ordenada
best_results_df["Tipo de Modelo"] = pd.Categorical(best_results_df["Tipo de Modelo"], categories=model_order, ordered=True)

# Ordena o DataFrame de acordo com a ordem desejada primeiro por Dataset e depois por Tipo de Modelo
best_results_df = best_results_df.sort_values(by=["Dataset", "Tipo de Modelo"])

# Exibe a tabela final
display(best_results_df)

# Salva o DataFrame com os melhores resultados em CSV
best_results_df.to_csv(f'{base_path}/resume/best_results.csv', index=False)


  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtypes(include='number').columns] = sorted_df.select_dtypes(include='number').applymap(lambda x: f"{x:.3f}")
  sorted_df[sorted_df.select_dtype

Unnamed: 0,Dataset,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score
22,CSTR,BERT,all-mpnet-base-v2,-,0.896,0.917,0.92,0.914
21,CSTR,LLM,Mistral - 7B,BP,0.789,0.834,0.825,0.825
23,CSTR,LLM2Vec,Meta-Llama-3-8B-Instruct-supervised,BP,0.89,0.927,0.915,0.918
31,SyskillWebert,BERT,all-MiniLM-L12-v2,-,0.925,0.931,0.911,0.914
30,SyskillWebert,LLM,Gemma2 - 9B,BP,0.796,0.81,0.751,0.76
32,SyskillWebert,LLM2Vec,Mistral-7B-Instruct-v2-supervised,ISP,0.913,0.932,0.893,0.907
19,review_polarity,BERT,all-mpnet-base-v2,-,0.709,0.719,0.709,0.706
18,review_polarity,LLM,Mistral - 7B,BP,0.934,0.935,0.934,0.934
20,review_polarity,LLM2Vec,Mistral-7B-Instruct-v2-supervised,ICP,0.857,0.869,0.857,0.856
1,Dmoz-Science,BERT,all-mpnet-base-v2,-,0.824,0.828,0.824,0.822


In [9]:
import pandas as pd

pd.set_option('display.max_colwidth', 500)

datasets = results_df['dataset_name'].unique()

# DataFrame para armazenar os resultados com média e desvio padrão
avg_results_df = pd.DataFrame()

# Loop para cada dataset
for dataset in datasets:
    # Filtra os dados para o dataset atual
    dataset_df = results_df[results_df['dataset_name'] == dataset]
    
    # Filtra cada tipo de modelo (model_type)
    model_types = dataset_df['model_type'].unique()
    for model_type in model_types:
        # Filtra os dados para o tipo de modelo atual
        model_type_df = dataset_df[dataset_df['model_type'] == model_type]
        
        # Calcula a média para as métricas relevantes
        avg_metrics = model_type_df[['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                                     'mean_test_f1_score', 'embedding_generation_time']].mean()
        
        # Calcula o desvio padrão para as métricas relevantes
        std_metrics = model_type_df[['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                                     'mean_test_f1_score', 'embedding_generation_time']].std()
        
        # Renomeia as colunas para as métricas com "média" e "desvio padrão"
        avg_metrics.index = [f'{col}_mean' for col in avg_metrics.index]
        std_metrics.index = [f'{col}_std' for col in std_metrics.index]
        
        # Cria uma nova linha com a média e o desvio padrão usando concatenação
        avg_row = pd.concat([avg_metrics, std_metrics])
        
        # Adiciona o Nome do Dataset e Tipo de Modelo
        avg_row['Dataset'] = dataset
        avg_row['Tipo de Modelo'] = model_type
        
        # Pega o modelo e o prompt correspondentes ao modelo atual
        best_model_info = model_type_df.iloc[0][['model_name', 'prompt_name']]
        avg_row['Nome do Modelo'] = best_model_info['model_name']
        avg_row['Prompt'] = best_model_info['prompt_name']
        
        # Combina média e desvio padrão em uma única coluna
        for metric in ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                       'mean_test_f1_score', 'embedding_generation_time']:
            mean_col = f'{metric}_mean'
            std_col = f'{metric}_std'
            avg_row[f'{metric}'] = f"{avg_row[mean_col]:.4f} +/- {avg_row[std_col]:.4f}"
        
        # Remove as colunas de média e desvio padrão separadas
        avg_row = avg_row.drop(columns=[f'{metric}_mean' for metric in ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                                                                        'mean_test_f1_score', 'embedding_generation_time']] + 
                                    [f'{metric}_std' for metric in ['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                                                                     'mean_test_f1_score', 'embedding_generation_time']])
        
        # Adiciona a linha com os resultados médios e desvios padrão na tabela
        avg_results_df = pd.concat([avg_results_df, avg_row.to_frame().T], ignore_index=True)

# Ordena o DataFrame final por Dataset e F1 Score médio em ordem decrescente
avg_results_df = avg_results_df.sort_values(by=["Dataset", "mean_test_f1_score"], ascending=[True, False])

# Organiza as colunas para as 4 primeiras colunas desejadas
columns_order = ['Dataset', 'Tipo de Modelo', 'Nome do Modelo', 'Prompt', 
                 'mean_test_accuracy', 'mean_test_precision', 'mean_test_recall', 
                 'mean_test_f1_score', 'embedding_generation_time']

avg_results_df = avg_results_df[columns_order]

# Exibe a tabela final com médias e desvios padrão combinados
display(avg_results_df)

# Salva o DataFrame com os resultados médios e desvios padrão em CSV
avg_results_df.to_csv(f'{base_path}/resume/average_results.csv', index=False)


Unnamed: 0,Dataset,Tipo de Modelo,Nome do Modelo,Prompt,mean_test_accuracy,mean_test_precision,mean_test_recall,mean_test_f1_score,embedding_generation_time
23,CSTR.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,instruction_summary_prompt,0.8774 +/- 0.0169,0.9083 +/- 0.0177,0.9015 +/- 0.0171,0.9006 +/- 0.0173,nan +/- nan
22,CSTR.csv,bert,sentence-transformers_all-distilroberta-v1,,0.8821 +/- 0.0146,0.9005 +/- 0.0153,0.9073 +/- 0.0138,0.9003 +/- 0.0142,nan +/- nan
21,CSTR.csv,ollama,phi3.5:3.8b,instruction_summary_prompt,0.7295 +/- 0.0474,0.7974 +/- 0.0392,0.6986 +/- 0.0825,0.7201 +/- 0.0757,nan +/- nan
8,Dmoz-Computers.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,instruction_summary_prompt,0.7718 +/- 0.0176,0.7709 +/- 0.0170,0.7675 +/- 0.0174,0.7594 +/- 0.0170,nan +/- nan
7,Dmoz-Computers.csv,bert,sentence-transformers_all-distilroberta-v1,,0.7301 +/- 0.0064,0.7230 +/- 0.0046,0.7243 +/- 0.0064,0.7150 +/- 0.0069,18.1915 +/- nan
6,Dmoz-Computers.csv,ollama,phi3.5:3.8b,instruction_summary_prompt,0.5993 +/- 0.0494,0.6136 +/- 0.0460,0.5982 +/- 0.0489,0.5955 +/- 0.0480,nan +/- nan
26,Dmoz-Health.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,instruction_summary_prompt,0.8863 +/- 0.0217,0.8902 +/- 0.0193,0.8863 +/- 0.0217,0.8861 +/- 0.0211,nan +/- nan
25,Dmoz-Health.csv,bert,sentence-transformers_all-distilroberta-v1,,0.8722 +/- 0.0076,0.8723 +/- 0.0077,0.8722 +/- 0.0076,0.8709 +/- 0.0077,nan +/- nan
24,Dmoz-Health.csv,ollama,phi3.5:3.8b,instruction_summary_prompt,0.7200 +/- 0.0521,0.7369 +/- 0.0508,0.7200 +/- 0.0521,0.7208 +/- 0.0533,nan +/- nan
2,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse,instruction_summary_prompt,0.8113 +/- 0.0218,0.8174 +/- 0.0228,0.8113 +/- 0.0218,0.8080 +/- 0.0221,nan +/- nan


In [10]:
datasets = results_df['dataset_name'].unique()

# DataFrame para consolidar os resultados de todos os datasets
consolidated_df = pd.DataFrame()

for dataset in datasets:
    # Filtra e organiza os dados do dataset atual
    sorted_df = results_df[results_df['dataset_name'] == dataset].sort_values(by="mean_test_f1_score", ascending=False)
    sorted_df = sorted_df.drop(["dataset_name", "classifier", "embedding_generation_size"], axis=1)
    sorted_df = sorted_df[sorted_df['model_type'] != "bartowski_aya-expanse-8b-GGUF"]
    
    # Renomeia as colunas para um formato mais descritivo
    sorted_df = sorted_df.rename(columns={
        'model_type': "Tipo de Modelo",
        "model_name": "Nome do Modelo",
        "prompt_name": "Prompt",
        'mean_test_accuracy': 'Acurácia',
        'mean_test_precision': 'Precisão',
        'mean_test_recall': 'Recall',
        'mean_test_f1_score': 'F1 Score',
        'embedding_generation_time': "Tempo de geração de embeddings"
    })
    
    # Arredonda as colunas numéricas para 3 casas decimais
    sorted_df[['Acurácia', 'Precisão', 'Recall', 'F1 Score', 'Tempo de geração de embeddings']] = \
        sorted_df[['Acurácia', 'Precisão', 'Recall', 'F1 Score', 'Tempo de geração de embeddings']].round(3)
    
    # Substitui valores nulos por "-"
    sorted_df = sorted_df.fillna('-')

    # Adiciona o nome do dataset como uma nova coluna
    sorted_df['Dataset'] = dataset
    
    # Adiciona o DataFrame do dataset atual ao consolidado
    consolidated_df = pd.concat([consolidated_df, sorted_df], ignore_index=True)

print(consolidated_df.columns)

consolidated_df = consolidated_df[['Dataset', 'Tipo de Modelo', 'Nome do Modelo', 'Prompt', 'Acurácia', 'Precisão', 'Recall', 'F1 Score', 'Tempo de geração de embeddings']]

# Exibe o DataFrame consolidado
display(consolidated_df)

# Salva o DataFrame consolidado em um arquivo CSV
consolidated_df.to_csv(f'{base_path}/resume/all_results.csv', index=False)


Index(['Tipo de Modelo', 'Nome do Modelo', 'Prompt', 'Acurácia', 'Precisão',
       'Recall', 'F1 Score', 'Tempo de geração de embeddings',
       'mean_score_time', 'Dataset'],
      dtype='object')


Unnamed: 0,Dataset,Tipo de Modelo,Nome do Modelo,Prompt,Acurácia,Precisão,Recall,F1 Score,Tempo de geração de embeddings
0,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Sheared-LLaMA-mntp-supervised,instruction_summary_prompt,0.836,0.841,0.836,0.833,-
1,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised,base_prompt,0.833,0.839,0.833,0.830,-
2,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Sheared-LLaMA-mntp-supervised,base_prompt,0.832,0.839,0.832,0.830,-
3,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised,base_prompt,0.832,0.842,0.832,0.829,-
4,Dmoz-Science.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse,instruction_classification_prompt,0.826,0.832,0.826,0.824,-
...,...,...,...,...,...,...,...,...,...
395,SyskillWebert.csv,ollama,mistral:7b,instruction_classification_prompt,0.763,0.762,0.709,0.706,-
396,SyskillWebert.csv,ollama,qwen2.5:7b,instruction_summary_prompt,0.757,0.763,0.704,0.698,-
397,SyskillWebert.csv,ollama,mistral:7b,base_prompt,0.745,0.758,0.686,0.675,-
398,SyskillWebert.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised,instruction_classification_prompt,0.604,0.796,0.495,0.525,-
