In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import re
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

pd.set_option('display.max_colwidth', None)

In [2]:
dir_path = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

models_map = {
    'Llama-3.1-8B-unsloth-bnb-4bit-V1.csv': 'Llama-3.1-8B-V1',
    'Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv': 'Llama-3.2-3B-V1',
    'phi-4-unsloth-bnb-4bit-V1.csv': 'Phi-4-14B-V1',
    'zephyr-sft-bnb-4bit-V1.csv': 'Zephyr-7B-V1',
    'Qwen2.5-7B-Instruct-bnb-4bit-V1.csv': 'Qwen2.5-7B-V1',
    'Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv': 'Qwen2.5-14B-V1',
}

df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
answer_mapping = {0: 'A', 1: 'B', 2: 'C', 3: 'D'}
df_answer['ANSWER'] = df_answer['ANSWER'].map(answer_mapping)
subject = pd.read_csv("/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu.csv")
df_answer = df_answer.merge(subject[['question_id', 'subject']], left_on='ID', right_on='question_id', how='left')


def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    
    df = df.merge(df_answer, on='ID', how='left')
    
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    
    return df


resultados_estatisticas = []

for filename in os.listdir(dir_path):
    if filename.endswith('.csv') and filename in models_map:
        model_name = models_map[filename]
        file_path = os.path.join(dir_path, filename)
        print(f"Processando: {model_name}")
        
        df = processar_dataframe(file_path)
        
        stats = df.groupby('is_error')[['len_prompt', 'len_output']].agg(
            len_prompt_mean=('len_prompt', 'mean'),
            len_prompt_median=('len_prompt', 'median'),
            len_prompt_std=('len_prompt', 'std'),
            len_output_mean=('len_output', 'mean'),
            len_output_median=('len_output', 'median'),
            len_output_std=('len_output', 'std'),
            total=('len_prompt', 'count')
        ).reset_index()
        
        stats['modelo'] = model_name
        resultados_estatisticas.append(stats)


df_estatisticas = pd.concat(resultados_estatisticas, ignore_index=True)
cols = ['modelo', 'is_error'] + [col for col in df_estatisticas.columns if col not in ['modelo', 'is_error']]
df_estatisticas = df_estatisticas[cols]
df_estatisticas.to_csv('estatisticas_erros_modelos_v1.csv', index=False)

Processando: Llama-3.2-3B-V1
Processando: Phi-4-14B-V1
Processando: Zephyr-7B-V1
Processando: Llama-3.1-8B-V1
Processando: Qwen2.5-7B-V1
Processando: Qwen2.5-14B-V1


## Violin

In [3]:
'''import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import re

def salvar_html(fig, caminho):
    try:
        os.makedirs(os.path.dirname(caminho), exist_ok=True)
        fig.write_html(caminho)
        print(f"Salvo HTML em: {caminho}")
    except Exception as e:
        print(f"Erro ao salvar HTML: {e}")

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    return df

def criar_violin_plot(df, coluna, row, fig):
    color_map = {
        "Acerto": "#1f77b4",  # Azul
        "Erro": "#d62728"     # Vermelho
    }
    violin = px.violin(
        df,
        y=coluna,
        x="resultado",
        color="resultado",
        box=True,
        points="all",
        color_discrete_map=color_map
    )
    for trace in violin.data:
        fig.add_trace(trace, row=row, col=1)
    fig.update_yaxes(title_text="Tamanho", row=row, col=1)


def plot_violin_duplo(df, model_name):
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.1,
        subplot_titles=[
            "Distribuição do Tamanho do Prompt de Entrada",
            "Distribuição do Tamanho do Prompt de Saída do Modelo"
        ]
    )

    criar_violin_plot(df, "len_prompt", row=1, fig=fig)
    criar_violin_plot(df, "len_output", row=2, fig=fig)

    fig.update_layout(
        height=800,
        width=1000,
        title_text=f"Distribuições - {model_name}",
        showlegend=False,
        font=dict(color="black", size=16),
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white"
    )
    fig.update_xaxes(title_text="Resultado (Acerto ou Erro)", row=2, col=1)

    salvar_html(fig, f"/home/annap/Downloads/01_plots_html/dist_{model_name}_violin.html")

# Caminhos
dir_path = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos nomes
models_map = {
    'Llama-3.1-8B-unsloth-bnb-4bit-V1.csv': 'Llama-3.1-8B-V1',
    'Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv': 'Llama-3.2-3B-V1',
    'phi-4-unsloth-bnb-4bit-V1.csv': 'Phi-4-14B-V1',
    'zephyr-sft-bnb-4bit-V1.csv': 'Zephyr-7B-V1',
    'Qwen2.5-7B-Instruct-bnb-4bit-V1.csv': 'Qwen2.5-7B-V1',
    'Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv': 'Qwen2.5-14B-V1',
}

# Carrega respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Loop para processar cada modelo
for filename in os.listdir(dir_path):
    if filename.endswith('.csv') and filename in models_map:
        model_name = models_map[filename]
        print(f"Processando: {model_name}")
        file_path = os.path.join(dir_path, filename)
        df = processar_dataframe(file_path)
        plot_violin_duplo(df, model_name)
'''

  '''import os


'import os\nimport pandas as pd\nimport plotly.express as px\nfrom plotly.subplots import make_subplots\nimport re\n\ndef salvar_html(fig, caminho):\n    try:\n        os.makedirs(os.path.dirname(caminho), exist_ok=True)\n        fig.write_html(caminho)\n        print(f"Salvo HTML em: {caminho}")\n    except Exception as e:\n        print(f"Erro ao salvar HTML: {e}")\n\ndef extrair_model_answer(texto):\n    if not isinstance(texto, str):\n        return \'invalid\'\n    match = re.search(r\'[ABCD]\', texto)\n    return match.group(0) if match else \'invalid\'\n\ndef processar_dataframe(filepath):\n    df = pd.read_csv(filepath)\n    df[\'output_model\'] = df[\'output_model\'].str.upper()\n    df[\'last_answer\'] = df[\'output_model\'].str.extract(\n        r\'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\\s*(.*)\', flags=re.IGNORECASE\n    ).fillna(\'invalid\')\n    df[\'model_answer\'] = df[\'last_answer\'].apply(extrair_model_answer)\n    df[\'len_prompt\'] = df[\'prompt\'].apply(lambda x: len(x)

In [4]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import os
import re

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao, modelo_base):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    df['versao'] = versao
    df['modelo_base'] = modelo_base
    return df

def plot_comparativo_violin(df_model, modelo_base):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=["Tamanho do Prompt", "Tamanho da Saída"],
        shared_yaxes=False,
        horizontal_spacing=0.15
    )

    for col, col_idx in zip(["len_prompt", "len_output"], [1, 2]):
        fig_tmp = px.violin(
            df_model,
            y=col,
            x="versao",
            color="resultado",
            box=True,
            points="all",
            category_orders={"versao": ["V0", "V1"]}
        )
        for trace in fig_tmp.data:
            fig.add_trace(trace, row=1, col=col_idx)

        fig.update_yaxes(title_text="Tamanho", row=1, col=col_idx)
        fig.update_xaxes(title_text="Versão", row=1, col=col_idx)

    fig.update_layout(
        title_text=f"Distribuição de Prompt e Output - {modelo_base}",
        font=dict(size=14),
        height=500,
        width=1000,
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white"
    )

    output_path = f"/home/annap/Downloads/01_plots_html/compare_{modelo_base}.html"
    fig.write_html(output_path)
    print(f"Salvo: {output_path}")

# Caminhos
dir_path = "../UNSLOTH/V1/GLOSA_PT"
answer_path = "/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv"

# Respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Mapeamento por modelo base
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
}

# Gera os plots
for modelo_base, arquivos in modelos_base.items():
    dfs = []
    for i, arquivo in enumerate(arquivos):
        versao = f"V{i}"
        path = os.path.join(dir_path, arquivo)
        if os.path.exists(path):
            df = processar_dataframe(path, versao, modelo_base)
            dfs.append(df)
    if dfs:
        df_modelo = pd.concat(dfs, ignore_index=True)
        plot_comparativo_violin(df_modelo, modelo_base)


Salvo: /home/annap/Downloads/01_plots_html/compare_Llama-3.1-8B.html
Salvo: /home/annap/Downloads/01_plots_html/compare_Llama-3.2-3B.html
Salvo: /home/annap/Downloads/01_plots_html/compare_Qwen2.5-7B.html
Salvo: /home/annap/Downloads/01_plots_html/compare_Qwen2.5-14B.html
Salvo: /home/annap/Downloads/01_plots_html/compare_Phi-4-14B.html
Salvo: /home/annap/Downloads/01_plots_html/compare_Zephyr-7B.html


In [5]:
'''import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import re

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao, modelo_base, df_answer):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['versao'] = versao
    df['modelo_base'] = modelo_base
    return df[df['is_error'] == 1]  # <-- apenas erros!

def plot_violin_erros(df_model, modelo_base):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=["Tamanho do Prompt (Erro)", "Tamanho do Output (Erro)"],
        shared_yaxes=False,
        horizontal_spacing=0.15
    )

    for col, idx in zip(["len_prompt", "len_output"], [1, 2]):
        fig_tmp = px.violin(
            df_model,
            y=col,
            x="versao",
            color="versao",
            box=True,
            points="all",
            category_orders={"versao": ["V0", "V1"]},
            color_discrete_map={"V0": "#636EFA", "V1": "#EF553B"}
        )
        for trace in fig_tmp.data:
            fig.add_trace(trace, row=1, col=idx)
        fig.update_yaxes(title_text="Tamanho", row=1, col=idx)
        fig.update_xaxes(title_text="Versão", row=1, col=idx)

    fig.update_layout(
        title_text=f"Distribuições de Tamanho (Erros) - {modelo_base}",
        font=dict(size=14),
        height=500,
        width=1000,
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white",
        showlegend=False
    )

    output_path = f"/home/annap/Downloads/01_plots_html/erros_{modelo_base}.html"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    fig.write_html(output_path)
    print(f"Salvo: {output_path}")

# Caminhos e arquivos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos arquivos para cada modelo base
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
}

# Carrega respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Processa e plota por modelo
for modelo_base, (v0_file, v1_file) in modelos_base.items():
    dfs = []
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if os.path.exists(v0_path):
        dfs.append(processar_dataframe(v0_path, 'V0', modelo_base, df_answer))
    if os.path.exists(v1_path):
        dfs.append(processar_dataframe(v1_path, 'V1', modelo_base, df_answer))

    if dfs:
        df_model = pd.concat(dfs, ignore_index=True)
        plot_violin_erros(df_model, modelo_base)
'''


invalid escape sequence '\s'


invalid escape sequence '\s'


invalid escape sequence '\s'



'import os\nimport pandas as pd\nimport plotly.express as px\nfrom plotly.subplots import make_subplots\nimport plotly.graph_objects as go\nimport re\n\ndef extrair_model_answer(texto):\n    if not isinstance(texto, str):\n        return \'invalid\'\n    match = re.search(r\'[ABCD]\', texto)\n    return match.group(0) if match else \'invalid\'\n\ndef processar_dataframe(filepath, versao, modelo_base, df_answer):\n    df = pd.read_csv(filepath)\n    df[\'output_model\'] = df[\'output_model\'].str.upper()\n    df[\'last_answer\'] = df[\'output_model\'].str.extract(r\'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\\s*(.*)\', flags=re.IGNORECASE).fillna(\'invalid\')\n    df[\'model_answer\'] = df[\'last_answer\'].apply(extrair_model_answer)\n    df[\'len_prompt\'] = df[\'prompt\'].apply(lambda x: len(x) if isinstance(x, str) else 0)\n    df[\'len_output\'] = df[\'output_model\'].apply(lambda x: len(x) if isinstance(x, str) else 0)\n    df = df.merge(df_answer, on=\'ID\', how=\'left\')\n    df[\'is_error

## V0

In [6]:
'''import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import re

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao, modelo_base, df_answer):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['versao'] = versao
    df['modelo_base'] = modelo_base
    return df[df['is_error'] == 1]  # Apenas erros

def plot_violin_erros(df_model, modelo_base):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=["Tamanho do Prompt dos Erros", "Tamanho do Output dos Erros"],
        horizontal_spacing=0.15
    )

    color_map = {"V0": "#636EFA", "V1": "#EF553B"}

    for col, idx in zip(["len_prompt", "len_output"], [1, 2]):
        violin_fig = px.violin(
            df_model,
            y=col,
            color="versao",
            box=True,
            points="all",
            color_discrete_map=color_map,
            category_orders={"versao": ["V0", "V1"]}
        )
        for trace in violin_fig.data:
            fig.add_trace(trace, row=1, col=idx)

        fig.update_yaxes(title_text="Tamanho", row=1, col=idx)
        fig.update_xaxes(title_text="", row=1, col=idx, showticklabels=False)

    fig.update_layout(
        title_text=f"Distribuição de Tamanhos (Erros) - {modelo_base}",
        font=dict(size=14),
        height=500,
        width=1000,
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white",
        showlegend=True,
        legend=dict(title="Versão", orientation="h", y=-0.2, x=0.25)
    )

    output_path = f"/home/annap/Downloads/01_plots_html/erros_{modelo_base}.html"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    fig.write_html(output_path)
    print(f"Salvo: {output_path}")

# Caminhos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos arquivos para cada modelo base
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "Phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
}

# Respostas corretas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Loop principal
for modelo_base, (v0_file, v1_file) in modelos_base.items():
    dfs = []
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if os.path.exists(v0_path):
        dfs.append(processar_dataframe(v0_path, 'V0', modelo_base, df_answer))
    if os.path.exists(v1_path):
        dfs.append(processar_dataframe(v1_path, 'V1', modelo_base, df_answer))

    if dfs:
        df_model = pd.concat(dfs, ignore_index=True)
        plot_violin_erros(df_model, modelo_base)
'''


invalid escape sequence '\s'


invalid escape sequence '\s'


invalid escape sequence '\s'



'import os\nimport pandas as pd\nimport plotly.express as px\nfrom plotly.subplots import make_subplots\nimport plotly.graph_objects as go\nimport re\n\ndef extrair_model_answer(texto):\n    if not isinstance(texto, str):\n        return \'invalid\'\n    match = re.search(r\'[ABCD]\', texto)\n    return match.group(0) if match else \'invalid\'\n\ndef processar_dataframe(filepath, versao, modelo_base, df_answer):\n    df = pd.read_csv(filepath)\n    df[\'output_model\'] = df[\'output_model\'].str.upper()\n    df[\'last_answer\'] = df[\'output_model\'].str.extract(r\'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\\s*(.*)\', flags=re.IGNORECASE).fillna(\'invalid\')\n    df[\'model_answer\'] = df[\'last_answer\'].apply(extrair_model_answer)\n    df[\'len_prompt\'] = df[\'prompt\'].apply(lambda x: len(x) if isinstance(x, str) else 0)\n    df[\'len_output\'] = df[\'output_model\'].apply(lambda x: len(x) if isinstance(x, str) else 0)\n    df = df.merge(df_answer, on=\'ID\', how=\'left\')\n    df[\'is_error

In [None]:

resultados_estatisticas = []

for filename in os.listdir(dir_path):
    if filename.endswith('.csv') and filename in models_map:
        model_name = models_map[filename]
        file_path = os.path.join(dir_path, filename)
        print(f"Processando: {model_name}")
        
        df = processar_dataframe(file_path)
        
        stats = df.groupby('is_error')[['len_prompt', 'len_output']].agg(
            len_prompt_mean=('len_prompt', 'mean'),
            len_prompt_median=('len_prompt', 'median'),
            len_prompt_std=('len_prompt', 'std'),
            len_output_mean=('len_output', 'mean'),
            len_output_median=('len_output', 'median'),
            len_output_std=('len_output', 'std'),
            total=('len_prompt', 'count')
        ).reset_index()
        
        stats['modelo'] = model_name
        resultados_estatisticas.append(stats)


df_estatisticas = pd.concat(resultados_estatisticas, ignore_index=True)
cols = ['modelo', 'is_error'] + [col for col in df_estatisticas.columns if col not in ['modelo', 'is_error']]
df_estatisticas = df_estatisticas[cols]
#df_estatisticas.to_csv('estatisticas_erros_modelos.csv', index=False)

Processando: Llama-3.2-3B-V1


TypeError: processar_dataframe() missing 2 required positional arguments: 'versao' and 'modelo_base'

Acertos x Erros

In [8]:
df_estatisticas = pd.read_csv('estatisticas_erros_modelos.csv')
df_estatisticas.head()

Unnamed: 0,modelo,is_error,len_prompt_mean,len_prompt_median,len_prompt_std,len_output_mean,len_output_median,len_output_std,total
0,Qwen2.5-7B,0,505.291798,423.0,251.167655,516.436122,434.0,251.195772,7303
1,Qwen2.5-7B,1,529.720408,422.0,300.767668,535.435136,433.0,275.446209,7747
2,Llama-3.2-3B,0,506.782668,426.0,248.496444,518.537935,438.0,248.627879,5839
3,Llama-3.2-3B,1,524.892628,420.0,295.079523,531.713278,432.0,273.790782,9211
4,Zephyr-7B,0,516.122513,427.0,271.97094,516.237238,434.0,244.20052,5779


In [None]:
'''def processar_dataframe(filepath, versao, modelo_base, df_answer):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['versao'] = versao
    df['modelo_base'] = modelo_base
    return df  # ⬅️ Agora retorna todos (acertos + erros)


def plot_violin_acertos_erros(df_model, modelo_base):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=["Tamanho do Prompt", "Tamanho do Output"],
        horizontal_spacing=0.15
    )

    color_map = {0: "#2ca02c", 1: "#d62728"}  # verde para acerto, vermelho para erro

    for col, idx in zip(["len_prompt", "len_output"], [1, 2]):
        violin_fig = px.violin(
            df_model,
            y=col,
            color="is_error",
            box=True,
            points="all",
            color_discrete_map=color_map,
            category_orders={"is_error": [0, 1]},
            labels={"is_error": "Erro"}
        )
        for trace in violin_fig.data:
            fig.add_trace(trace, row=1, col=idx)

        fig.update_yaxes(title_text="Tamanho", row=1, col=idx)
        fig.update_xaxes(title_text="", row=1, col=idx, showticklabels=False)

    fig.update_layout(
        title_text=f"Acertos vs. Erros - {modelo_base}",
        font=dict(size=14),
        height=500,
        width=1000,
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white",
        showlegend=True,
        legend=dict(title="Acerto (0) / Erro (1)", orientation="h", y=-0.2, x=0.25)
    )

    output_path = f"/home/annap/Downloads/01_plots_html/acertos_erros_{modelo_base}.html"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    fig.write_html(output_path)
    print(f"Salvo: {output_path}")



# Loop principal
for modelo_base, (v0_file, v1_file) in modelos_base.items():
    dfs = []
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if os.path.exists(v0_path):
        dfs.append(processar_dataframe(v0_path, 'V0', modelo_base, df_answer))
    if os.path.exists(v1_path):
        dfs.append(processar_dataframe(v1_path, 'V1', modelo_base, df_answer))

    if dfs:
        df_model = pd.concat(dfs, ignore_index=True)
        plot_violin_acertos_erros(df_model, modelo_base)
'''

Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Llama-3.1-8B.html
Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Llama-3.2-3B.html
Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Qwen2.5-7B.html
Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Qwen2.5-14B.html
Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Phi-4-14B.html
Salvo: /home/annap/Downloads/01_plots_html/acertos_erros_Zephyr-7B.html


In [None]:
'''import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import re

def salvar_html(fig, caminho):
    try:
        os.makedirs(os.path.dirname(caminho), exist_ok=True)
        fig.write_html(caminho)
        print(f"Salvo HTML em: {caminho}")
    except Exception as e:
        print(f"Erro ao salvar HTML: {e}")

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    df['versao'] = versao
    df['grupo'] = df['versao'] + " - " + df['resultado']
    return df

def criar_violin_plot(df, coluna, row, fig):
    color_map = {
        "V0 - Acerto": "#4c78a8",  # Azul claro
        "V1 - Acerto": "#6baed6",  # Azul mais claro
        "V0 - Erro": "#e45756",    # Vermelho vivo
        "V1 - Erro": "#f28e2b",    # Vermelho alaranjado
    }
    violin = px.violin(
        df,
        y=coluna,
        x="grupo",
        color="grupo",
        box=True,
        points="all",
        color_discrete_map=color_map
    )
    for trace in violin.data:
        fig.add_trace(trace, row=row, col=1)

    fig.update_yaxes(title_text="Tamanho", row=row, col=1)
    fig.update_xaxes(title_text="", row=row, col=1, showticklabels=True)

def plot_violin_duplo(df, model_name):
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=False,  # Importante: não compartilhar eixo x se queremos exibir ticks em ambos
        vertical_spacing=0.1,
        subplot_titles=[
            "Distribuição do Tamanho do Prompt de Entrada",
            "Distribuição do Tamanho do Prompt de Saída do Modelo"
        ]
    )

    criar_violin_plot(df, "len_prompt", row=1, fig=fig)
    criar_violin_plot(df, "len_output", row=2, fig=fig)

    fig.update_layout(
        height=850,
        width=1000,
        title_text=f"Distribuições por Versão e Resultado - {model_name}",
        showlegend=False,
        font=dict(color="black", size=16),
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white"
    )

    salvar_html(fig, f"/home/annap/Downloads/01_plots_html/dist_{model_name}_V0_V1_violin.html")

# Caminhos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos modelos
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
}

# Respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Loop principal
for model_name, (v0_file, v1_file) in modelos_base.items():
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if not os.path.exists(v0_path) or not os.path.exists(v1_path):
        print(f"[!] Arquivo ausente para {model_name}, pulando.")
        continue

    df_v0 = processar_dataframe(v0_path, "V0")
    df_v1 = processar_dataframe(v1_path, "V1")
    df_combined = pd.concat([df_v0, df_v1], ignore_index=True)

    plot_violin_duplo(df_combined, model_name)
'''

Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_Llama-3.1-8B_V0_V1_violin.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_Llama-3.2-3B_V0_V1_violin.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_phi-4-14B_V0_V1_violin.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_Zephyr-7B_V0_V1_violin.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_Qwen2.5-7B_V0_V1_violin.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/dist_Qwen2.5-14B_V0_V1_violin.html


In [None]:
'''import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import re

def salvar_html(fig, caminho):
    try:
        os.makedirs(os.path.dirname(caminho), exist_ok=True)
        fig.write_html(caminho)
        print(f"Salvo HTML em: {caminho}")
    except Exception as e:
        print(f"Erro ao salvar HTML: {e}")

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    df['versao'] = versao
    df['grupo'] = df['versao'] + " - " + df['resultado']
    return df

def criar_box_plot(df, coluna, row, fig):
    color_map = {
        "V0 - Acerto": "#4c78a8",
        "V1 - Acerto": "#6baed6",
        "V0 - Erro": "#e45756",
        "V1 - Erro": "#f28e2b",
    }
    box = px.box(
        df,
        y=coluna,
        x="grupo",
        color="grupo",
        points="all",
        color_discrete_map=color_map
    )
    for trace in box.data:
        fig.add_trace(trace, row=row, col=1)

    fig.update_yaxes(title_text="Tamanho", row=row, col=1)
    fig.update_xaxes(title_text="", row=row, col=1, showticklabels=True)

def plot_box_duplo(df, model_name):
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=False,
        vertical_spacing=0.1,
        subplot_titles=[
            "Distribuição do Tamanho do Prompt de Entrada",
            "Distribuição do Tamanho do Prompt de Saída do Modelo"
        ]
    )

    criar_box_plot(df, "len_prompt", row=1, fig=fig)
    criar_box_plot(df, "len_output", row=2, fig=fig)

    fig.update_layout(
        height=850,
        width=1000,
        title_text=f"Distribuições por Versão e Resultado (Box Plot) - {model_name}",
        showlegend=False,
        font=dict(color="black", size=16),
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white"
    )

    salvar_html(fig, f"/home/annap/Downloads/01_plots_html/box_{model_name}_V0_V1.html")

# Caminhos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos modelos
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
}

# Respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Loop principal
for model_name, (v0_file, v1_file) in modelos_base.items():
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if not os.path.exists(v0_path) or not os.path.exists(v1_path):
        print(f"[!] Arquivo ausente para {model_name}, pulando.")
        continue

    df_v0 = processar_dataframe(v0_path, "V0")
    df_v1 = processar_dataframe(v1_path, "V1")
    df_combined = pd.concat([df_v0, df_v1], ignore_index=True)

    plot_box_duplo(df_combined, model_name)
'''

Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Llama-3.1-8B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Llama-3.2-3B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_phi-4-14B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Zephyr-7B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Qwen2.5-7B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Qwen2.5-14B_V0_V1.html


In [9]:
import os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import re

def salvar_html(fig, caminho):
    try:
        os.makedirs(os.path.dirname(caminho), exist_ok=True)
        fig.write_html(caminho)
        print(f"Salvo HTML em: {caminho}")
    except Exception as e:
        print(f"Erro ao salvar HTML: {e}")

def extrair_model_answer(texto):
    if not isinstance(texto, str):
        return 'invalid'
    match = re.search(r'[ABCD]', texto)
    return match.group(0) if match else 'invalid'

def processar_dataframe(filepath, versao):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(extrair_model_answer)
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    df['versao'] = versao
    df['grupo'] = df['versao'] + " - " + df['resultado']
    return df

def criar_box_plot(df, coluna, row, fig):
    ordem_grupo = ["V0 - Erro", "V0 - Acerto", "V1 - Acerto", "V1 - Erro"]
    color_map = {
        "V0 - Erro": "#e45756",
        "V0 - Acerto": "#4c78a8",
        "V1 - Acerto": "#6baed6",
        "V1 - Erro": "#f22b67",
    }

    box = px.box(
        df,
        y=coluna,
        x="grupo",
        color="grupo",
        points="all",
        color_discrete_map=color_map,
        category_orders={"grupo": ordem_grupo}
    )
    for trace in box.data:
        fig.add_trace(trace, row=row, col=1)

    fig.update_yaxes(title_text="Tamanho", row=row, col=1)
    fig.update_xaxes(title_text="", row=row, col=1, showticklabels=True)


def plot_box_duplo(df, model_name):
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=False,
        vertical_spacing=0.1,
        subplot_titles=[
            "Distribuição do Tamanho do Prompt de Entrada do Modelo",
            "Distribuição do Tamanho do Prompt de Saída do Modelo"
        ]
    )

    criar_box_plot(df, "len_prompt", row=1, fig=fig)
    criar_box_plot(df, "len_output", row=2, fig=fig)

    fig.update_layout(
        height=850,
        width=1000,
        title_text=f"Distribuições por Versão e Resultado (Box Plot) - {model_name}",
        showlegend=False,
        font=dict(color="black", size=16),
        plot_bgcolor="#f9f9f9",
        paper_bgcolor="white"
    )

    salvar_html(fig, f"/home/annap/Downloads/01_plots_html/box_{model_name}_V0_V1.html")

# Caminhos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

# Mapeamento dos modelos
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
}

# Respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Loop principal
for model_name, (v0_file, v1_file) in modelos_base.items():
    v0_path = os.path.join(v0_dir, v0_file)
    v1_path = os.path.join(v1_dir, v1_file)

    if not os.path.exists(v0_path) or not os.path.exists(v1_path):
        print(f"[!] Arquivo ausente para {model_name}, pulando.")
        continue

    df_v0 = processar_dataframe(v0_path, "V0")
    df_v1 = processar_dataframe(v1_path, "V1")
    df_combined = pd.concat([df_v0, df_v1], ignore_index=True)

    plot_box_duplo(df_combined, model_name)


Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Llama-3.1-8B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Llama-3.2-3B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_phi-4-14B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Zephyr-7B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Qwen2.5-7B_V0_V1.html
Salvo HTML em: /home/annap/Downloads/01_plots_html/box_Qwen2.5-14B_V0_V1.html


# Subjects

In [57]:
dir_path = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'

models_map = {
    'Llama-3.1-8B-unsloth-bnb-4bit-V1.csv': 'Llama-3.1-8B-V1',
    'Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv': 'Llama-3.2-3B-V1',
    'phi-4-unsloth-bnb-4bit-V1.csv': 'Phi-4-14B-V1',
    'zephyr-sft-bnb-4bit-V1.csv': 'Zephyr-7B-V1',
    'Qwen2.5-7B-Instruct-bnb-4bit-V1.csv': 'Qwen2.5-7B-V1',
    'Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv': 'Qwen2.5-14B-V1',
}

df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
answer_mapping = {0: 'A', 1: 'B', 2: 'C', 3: 'D'}
df_answer['ANSWER'] = df_answer['ANSWER'].map(answer_mapping)
df_answer.head()

Unnamed: 0,ID,ANSWER
0,0,B
1,1,C
2,2,D
3,3,B
4,4,B


In [None]:
import pandas as pd
import os

def processar_dataframe(filepath, versao):
    df = pd.read_csv(filepath)
    df['output_model'] = df['output_model'].str.upper()
    df['last_answer'] = df['output_model'].str.extract(
        r'(?:ASSISTANT:|ANSWER:|RESPOSTA:)\s*(.*)', flags=re.IGNORECASE
    ).fillna('invalid')
    df['model_answer'] = df['last_answer'].apply(lambda x: re.search(r'[ABCD]', x).group(0) if isinstance(x, str) and re.search(r'[ABCD]', x) else 'invalid')
    df['len_prompt'] = df['prompt'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df['len_output'] = df['output_model'].apply(lambda x: len(x) if isinstance(x, str) else 0)
    df = df.merge(df_answer, on='ID', how='left')
    df['is_error'] = (df['model_answer'] != df['ANSWER']).astype(int)
    df['resultado'] = df['is_error'].map({0: 'Acerto', 1: 'Erro'})
    df['versao'] = versao
    df['grupo'] = df['versao'] + " - " + df['resultado']
    return df

def gerar_tabela_subject_erros(modelos_base, v0_dir, v1_dir, df_answer_base, subject_path):
    # Carrega os assuntos por questão
    subject_df = pd.read_csv(subject_path)[['question_id', 'subject']]

    resultados = {}

    for model_name, (v0_file, v1_file) in modelos_base.items():
        v0_path = os.path.join(v0_dir, v0_file)
        v1_path = os.path.join(v1_dir, v1_file)

        if not os.path.exists(v0_path) or not os.path.exists(v1_path):
            print(f"[!] Arquivo ausente para {model_name}, pulando.")
            continue

        # Merge dos assuntos com as respostas esperadas
        df_answer = df_answer_base.copy()
        df_answer = df_answer.merge(subject_df, left_on='ID', right_on='question_id', how='left')

        # Processa os dataframes das duas versões
        df_v0 = processar_dataframe(v0_path, "V0").merge(subject_df, left_on='ID', right_on='question_id', how='left')
        df_v1 = processar_dataframe(v1_path, "V1").merge(subject_df, left_on='ID', right_on='question_id', how='left')

        # Filtra apenas os erros
        erros_v0 = df_v0[df_v0['is_error'] == 1]['subject'].value_counts()
        erros_v1 = df_v1[df_v1['is_error'] == 1]['subject'].value_counts()

        # Seleciona os 10 assuntos mais problemáticos (soma total dos dois)
        top_subjects = (erros_v0.add(erros_v1, fill_value=0)).sort_values(ascending=False).head(10).index

        for subject in top_subjects:
            chave_v0 = f"{model_name}-V0"
            chave_v1 = f"{model_name}-V1"

            if subject not in resultados:
                resultados[subject] = {}

            resultados[subject][chave_v0] = int(erros_v0.get(subject, 0))
            resultados[subject][chave_v1] = int(erros_v1.get(subject, 0))

    # Gera o DataFrame final com os resultados
    df_result = pd.DataFrame(resultados).T.fillna(0).astype(int)
    df_result = df_result.sort_index(axis=1)  # ordena colunas alfabeticamente
    df_result.reset_index(inplace=True)
    df_result.rename(columns={'index': 'subject'}, inplace=True)

    return df_result

# Caminhos
v0_dir = '../UNSLOTH/NO_FINE_TUNE/GLOSA_PT'
v1_dir = '../UNSLOTH/V1/GLOSA_PT'
answer_path = '/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu_prompt_glosa_pt.csv'
subject_path = "/home/annap/Documentos/chatbot_copy/DATASETS/MMLU/0_mmlu.csv"

# Mapeamento dos modelos
modelos_base = {
    "Llama-3.1-8B": ["Llama-3.1-8B-unsloth-bnb-4bit.csv", "Llama-3.1-8B-unsloth-bnb-4bit-V1.csv"],
    "Llama-3.2-3B": ["Llama-3.2-3B-Instruct-unsloth-bnb-4bit.csv", "Llama-3.2-3B-Instruct-unsloth-bnb-4bit-V1.csv"],
    "phi-4-14B": ["phi-4-unsloth-bnb-4bit.csv", "phi-4-unsloth-bnb-4bit-V1.csv"],
    "Zephyr-7B": ["zephyr-sft-bnb-4bit.csv", "zephyr-sft-bnb-4bit-V1.csv"],
    "Qwen2.5-7B": ["Qwen2.5-7B-Instruct-bnb-4bit.csv", "Qwen2.5-7B-Instruct-bnb-4bit-V1.csv"],
    "Qwen2.5-14B": ["Qwen2.5-14B-Instruct-unsloth-bnb-4bit.csv", "Qwen2.5-14B-Instruct-unsloth-bnb-4bit-V1.csv"],
}

# Respostas
df_answer = pd.read_csv(answer_path)[['ID', 'ANSWER']]
df_answer['ANSWER'] = df_answer['ANSWER'].map({0: 'A', 1: 'B', 2: 'C', 3: 'D'})

# Gera a tabela
tabela_subject_erro = gerar_tabela_subject_erros(
    modelos_base, v0_dir, v1_dir, df_answer, subject_path
)

print(tabela_subject_erro)

# (Opcional) Salvar em CSV
tabela_subject_erro.to_csv("tabela_subject_erro.csv", index=False)

                       subject  Llama-3.1-8B-V0  Llama-3.1-8B-V1  \
0             professional_law             1390             1171   
1              moral_scenarios              983              728   
2                miscellaneous              579              454   
3      professional_psychology              574              409   
4       high_school_psychology              464              302   
5   high_school_macroeconomics              392              268   
6       elementary_mathematics              366              262   
7               moral_disputes              319              235   
8                   philosophy              300              205   
9      professional_accounting              292              212   
10                  prehistory                0                0   
11     high_school_mathematics                0                0   

    Llama-3.2-3B-V0  Llama-3.2-3B-V1  Qwen2.5-14B-V0  Qwen2.5-14B-V1  \
0              1166             1140       

In [64]:
tabela_subject_erro.head(10)

Unnamed: 0,subject,Llama-3.1-8B-V0,Llama-3.1-8B-V1,Llama-3.2-3B-V0,Llama-3.2-3B-V1,Qwen2.5-14B-V0,Qwen2.5-14B-V1,Qwen2.5-7B-V0,Qwen2.5-7B-V1,Zephyr-7B-V0,Zephyr-7B-V1,phi-4-14B-V0,phi-4-14B-V1
0,professional_law,1390,1171,1166,1140,1109,1058,1095,1094,1128,1139,1666,1026
1,moral_scenarios,983,728,721,714,722,708,726,744,753,737,995,741
2,miscellaneous,579,454,480,484,339,308,360,345,457,451,865,308
3,professional_psychology,574,409,424,419,337,306,359,345,448,443,667,317
4,high_school_psychology,464,302,308,311,222,168,231,209,306,304,601,195
5,high_school_macroeconomics,392,268,280,296,200,158,206,193,265,254,431,177
6,elementary_mathematics,366,262,250,275,210,166,208,193,257,256,380,184
7,moral_disputes,319,235,224,221,207,183,194,196,238,242,383,170
8,philosophy,300,205,201,202,162,144,0,0,225,204,0,0
9,professional_accounting,292,212,216,213,194,183,201,196,216,223,311,190
