In [None]:
%cd ../..

: 

In [2]:
import os
import numpy as np
import pandas as pd
from src.core.utils import read_json

# Configura o Pandas para exibir todas as colunas
pd.set_option('display.max_columns', None)

def load_results_to_dataframe(base_path: str) -> pd.DataFrame:
    """
    Load results from JSON files into a pandas DataFrame.
    """
    results = []
    
    # Traverse the directory structure
    for dataset_name in os.listdir(base_path):
        dataset_path = os.path.join(base_path, dataset_name)
        if os.path.isdir(dataset_path):
            for model_type in os.listdir(dataset_path):
                model_type_path = os.path.join(dataset_path, model_type)
                
                if os.path.isdir(model_type_path):
                    for model_name in os.listdir(model_type_path):
                        model_name_path = os.path.join(model_type_path, model_name)
                        
                        # Define paths based on whether prompt_name is needed
                        if model_type != "bert":
                            subdirs = [os.path.join(model_name_path, prompt) for prompt in os.listdir(model_name_path)]
                        else:
                            subdirs = [model_name_path]
                        
                        # Process results.json files from determined paths
                        for subdir in subdirs:
                            for classifier in os.listdir(subdir):
                                classifier_path = os.path.join(subdir, classifier)
                                
                                # Check for the results.json in the classifier path
                                json_file_path = os.path.join(classifier_path, 'results.json')
                                
                                if os.path.isfile(json_file_path):
                                    result_data = read_json(json_file_path)

                                    keys_to_extract = ["split0_test_f1_score", "split1_test_f1_score", "split2_test_f1_score", "split3_test_f1_score", "split4_test_f1_score", 'embedding_generation_time', 'embedding_generation_size']
    
                                    # Extrai apenas as chaves especificadas
                                    result_data= {key: result_data.get(key) for key in keys_to_extract}
                                    
                                    # Add metadata to the result data
                                    result_data['dataset_name'] = dataset_name
                                    result_data['model_type'] = model_type
                                    result_data['model_name'] = model_name
                                    result_data['classifier'] = classifier
                                    
                                    # Add prompt_name if applicable
                                    if model_type != "bert":
                                        result_data['prompt_name'] = os.path.basename(subdir)
                                    else:
                                        result_data['prompt_name'] = None
                                    
                                    results.append(result_data)

    # Create a DataFrame from the results
    results_df = pd.DataFrame(results)

    # Specify the order of the columns
    columns_first = ['dataset_name', 'model_type', 'model_name', 'classifier']
    if 'prompt_name' in results_df.columns:
        columns_first.append('prompt_name')
    column_order = columns_first + [col for col in results_df.columns if col not in columns_first]
    results_df = results_df[column_order]
    
    return results_df

In [3]:
base_path = 'results' 

df = load_results_to_dataframe(base_path)
df

Unnamed: 0,dataset_name,model_type,model_name,classifier,prompt_name,split0_test_f1_score,split1_test_f1_score,split2_test_f1_score,split3_test_f1_score,split4_test_f1_score,embedding_generation_time,embedding_generation_size
0,Dmoz-Computers.csv,bert,sentence-transformers_all-distilroberta-v1,knn,,0.708965,0.698970,0.707780,0.716755,0.708086,4.272044,29184128
1,Dmoz-Computers.csv,bert,sentence-transformers_all-MiniLM-L6-v2,knn,,0.711310,0.708279,0.706688,0.719293,0.729984,3.669558,14592128
2,Dmoz-Computers.csv,bert,sentence-transformers_all-MiniLM-L12-v2,knn,,0.703686,0.708847,0.716030,0.722475,0.711265,4.991892,14592128
3,Dmoz-Computers.csv,bert,sentence-transformers_all-mpnet-base-v2,knn,,0.725192,0.721513,0.710816,0.731876,0.733004,9.260681,29184128
4,Dmoz-Computers.csv,llm2vec,McGill-NLP_LLM2Vec-Sheared-LLaMA-mntp-supervised,knn,instruction_summary_prompt,0.775330,0.752068,0.771297,0.771387,0.775164,41.051134,128
...,...,...,...,...,...,...,...,...,...,...,...,...
105,NSF.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp...,knn,instruction_classification_prompt,0.883162,0.874623,0.868259,0.873625,0.874772,239.332735,128
106,NSF.csv,llm2vec,McGill-NLP_LLM2Vec-Mistral-7B-Instruct-v2-mntp...,knn,base_prompt,0.880013,0.878815,0.863143,0.880184,0.879590,63.920336,128
107,NSF.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mn...,knn,instruction_summary_prompt,0.889105,0.878020,0.885468,0.889903,0.888813,137.549927,128
108,NSF.csv,llm2vec,McGill-NLP_LLM2Vec-Meta-Llama-3-8B-Instruct-mn...,knn,instruction_classification_prompt,0.878154,0.867611,0.866053,0.877906,0.878015,221.728992,128


In [15]:
import pandas as pd

# Exemplo de DataFrame (substitua pelo seu df real)
# df = pd.DataFrame({...})

# Defina os splits que você quer combinar
splits = ['split0_test_f1_score', 
          'split1_test_f1_score', 
          'split2_test_f1_score', 
          'split3_test_f1_score', 
          'split4_test_f1_score']

# Agrupar pelo 'prompt_name' e combinar os splits em uma lista para cada 'prompt_name'
df_combined = df.groupby('prompt_name')[splits].apply(lambda x: x.values.flatten().tolist()).reset_index()

# Exibir o resultado
df_combined  # A coluna 0 contém a lista dos splits combinados


Unnamed: 0,prompt_name,0
0,base_prompt,"[0.7723636763722447, 0.763658005103482, 0.7634..."
1,instruction_classification_prompt,"[0.753274119106956, 0.7408529426699233, 0.7467..."
2,instruction_summary_prompt,"[0.7753301888686077, 0.7520678321654, 0.771296..."


In [26]:
import numpy as np
from scipy.stats import f_oneway

base_prompt = df_combined[df_combined['prompt_name'] == "base_prompt"][0].tolist()[0]
instruction_summary_prompt = df_combined[df_combined['prompt_name'] == "instruction_summary_prompt"][0].tolist()[0]
instruction_classification_prompt = df_combined[df_combined['prompt_name'] == "instruction_classification_prompt"][0].tolist()[0]

# Calcule as variâncias para cada grupo
var_base = np.var(base_prompt)
var_summary = np.var(instruction_summary_prompt)
var_classification = np.var(instruction_classification_prompt)

print("Variância para base_prompt:", var_base)
print("Variância para instruction_summary_prompt:", var_summary)
print("Variância para instruction_classification_prompt:", var_classification)

# Execute o ANOVA clássico para comparar as médias dos grupos
anova_result = f_oneway(base_prompt, instruction_summary_prompt, instruction_classification_prompt)

print("\nResultado do ANOVA:")
print("Statistic:", anova_result.statistic, "p-value:", anova_result.pvalue)


Variância para base_prompt: 0.004436530205440216
Variância para instruction_summary_prompt: 0.004257131661144218
Variância para instruction_classification_prompt: 0.004086945446258278

Resultado do ANOVA:
Statistic: 0.3848971214194376 p-value: 0.6807459620446685


In [5]:
from scipy.stats import ttest_ind, mannwhitneyu

# Separando os tempos por modelo
time_llm2vec = df[df['model_type'] == 'llm2vec']['embedding_generation_time']
time_bert = df[df['model_type'] == 'bert']['embedding_generation_time']

# Teste de Mann-Whitney
mw_stat, mw_pval = mannwhitneyu(time_llm2vec, time_bert)
print(f"Teste Mann-Whitney: estatística={mw_stat}, p-valor={mw_pval}")


Teste Mann-Whitney: estatística=1793.0, p-valor=4.621730739866808e-12


In [6]:
# Separando os tempos por modelo
tempos_llm2vec = df[df['model_type'] == 'llm2vec']['mean_test_f1_score']
tempos_bert = df[df['model_type'] == 'bert']['mean_test_f1_score']

# Teste de Mann-Whitney
mw_stat, mw_pval = mannwhitneyu(tempos_llm2vec, tempos_bert)
print(f"Teste Mann-Whitney: estatística={mw_stat}, p-valor={mw_pval}")


KeyError: 'mean_test_f1_score'

In [None]:
import pandas as pd
from scipy.stats import shapiro

# Supondo que 'df' seja o seu DataFrame
# Separe os dados de 'mean_test_f1_score' para cada prompt
base_prompt = df[df['prompt_name'] == 'base_prompt']['mean_test_f1_score']
instruction_summary_prompt = df[df['prompt_name'] == 'instruction_summary_prompt']['mean_test_f1_score']
instruction_classification_prompt = df[df['prompt_name'] == 'instruction_classification_prompt']['mean_test_f1_score']

# Realize o Teste de Shapiro-Wilk para cada grupo
shapiro_base = shapiro(base_prompt)
shapiro_summary = shapiro(instruction_summary_prompt)
shapiro_classification = shapiro(instruction_classification_prompt)

# Exibe os resultados do teste
print("Teste de Shapiro-Wilk para base_prompt:")
print("Statistic:", shapiro_base.statistic, "p-value:", shapiro_base.pvalue)

print("\nTeste de Shapiro-Wilk para instruction_summary_prompt:")
print("Statistic:", shapiro_summary.statistic, "p-value:", shapiro_summary.pvalue)

print("\nTeste de Shapiro-Wilk para instruction_classification_prompt:")
print("Statistic:", shapiro_classification.statistic, "p-value:", shapiro_classification.pvalue)


Teste de Shapiro-Wilk para base_prompt:
Statistic: 0.9736904972818591 p-value: 0.6442262521848311

Teste de Shapiro-Wilk para instruction_summary_prompt:
Statistic: 0.9560738779996244 p-value: 0.24506938060597977

Teste de Shapiro-Wilk para instruction_classification_prompt:
Statistic: 0.9602047894188402 p-value: 0.3135277844008573


In [None]:
import pandas as pd
from scipy.stats import f_oneway

# Calcule as variâncias para cada grupo
var_base = base_prompt.var()
var_summary = instruction_summary_prompt.var()
var_classification = instruction_classification_prompt.var()

print("Variância para base_prompt:", var_base)
print("Variância para instruction_summary_prompt:", var_summary)
print("Variância para instruction_classification_prompt:", var_classification)

# Execute o ANOVA clássico para comparar as médias dos grupos
anova_result = f_oneway(base_prompt, instruction_summary_prompt, instruction_classification_prompt)

print("\nResultado do ANOVA:")
print("Statistic:", anova_result.statistic, "p-value:", anova_result.pvalue)


Variância para base_prompt: 0.004456345961475927
Variância para instruction_summary_prompt: 0.004256975670279224
Variância para instruction_classification_prompt: 0.004104426742356053

Resultado do ANOVA:
Statistic: 0.07727150998046275 p-value: 0.9257019538195259
