### Imports

In [1]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt

## Redução de linhas por modelo

## Tempo medio de cada modelo por fontes

## Precisão dos modelos por fontes

### Inferências

In [None]:
import pandas as pd
import json
import glob
import re
from pathlib import Path

def extract_analysis_data(analysis_str):
    match = re.search(r"```json\s*(\{.*?\})\s*```", analysis_str, re.DOTALL)
    if not match:
        match = re.search(r"\{[^{}]*\"CLASSIFICATION\"[^{}]*\}", analysis_str, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(1 if '```' in match.group(0) else 0))
        except json.JSONDecodeError:
            return None
    return None

all_data = pd.DataFrame()

for test in range(12):
    files = glob.glob(f'{test}/inference/**/*.jsonl', recursive=True)
    print(f"Test {test}: Encontrados {len(files)} arquivos JSONL.")
    
    # {test}/inference/{model}/{data|filtering}/{font}/{type}/data.jsonl
    for file_path in files:
        path = Path(file_path)
        model = path.parts[2]
        if(path.parts[3] == "data"):
            font = path.parts[3]
            type = path.parts[4]
        else:
            font = path.parts[4]
            type = path.parts[5]
        
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    record = json.loads(line)
                    analysis_data = extract_analysis_data(record.get('analysis', ''))

                    all_data = pd.concat([all_data, pd.DataFrame([{
                        'model': model,
                        'type': type,
                        'font': font,
                        'source_file': record.get('file', ''),
                        'classification': analysis_data.get('CLASSIFICATION', "NOT INTERESTING") if analysis_data else "NOT INTERESTING",
                        'confidence': analysis_data.get('CONFIDENCE', 0) if analysis_data else 0,
                        'test': test
                    }])], ignore_index=True)

                except Exception as e:
                    print(f"Erro ao processar linha do arquivo {file_path}: {str(e)}")

print("-----------------------------------")
print(f"Total de registros processados: {len(all_data)}")
print("\nPrimeiros registros:")
display(all_data)
all_data.to_csv("classifications.csv", index=False)


Test 0: Encontrados 2099 arquivos JSONL.
Test 1: Encontrados 2100 arquivos JSONL.
Test 2: Encontrados 2100 arquivos JSONL.


KeyboardInterrupt: 

### Métricas

### Gerando inferências concretas

In [None]:
import pandas as pd

def process_group(group):
    if group.empty:
        return pd.DataFrame()
    
    model = group['model'].iloc[0]
    source_file = group['source_file'].iloc[0]
    type_ = group['type'].iloc[0]
    font = group['font'].iloc[0]
    test = group['test'].iloc[0]

    # Contar ocorrências de cada classificação
    class_counts = group['classification'].value_counts()
    max_count = class_counts.max()
    
    # Identificar classes com contagem máxima (podem ser múltiplas em caso de empate)
    top_classes = class_counts[class_counts == max_count].index.tolist()
    
    # Critério de desempate: priorizar 'INTERESTING'
    if len(top_classes) > 1 and 'INTERESTING' in top_classes:
        majority_class = 'INTERESTING'
    else:
        majority_class = class_counts.idxmax()

    consolidated = pd.DataFrame({
        'model': [model],
        'font': [font],
        'source_file': [source_file],
        'type': [type_],
        'classification': [majority_class],
        'confidence': [group['confidence'].mean()],
        'test': [test]
    })
    
    return consolidated

df = pd.read_csv('classifications.csv')
df['confidence'] = pd.to_numeric(df['confidence'], errors='coerce')
df = df.dropna(subset=['confidence'])

processed = (
    df.groupby(['model', 'test', 'source_file'], group_keys=False)
    .apply(process_group)
    .reset_index(drop=True)
)

# Salvar resultado
final_df = processed[['model', 'font', 'source_file', 'type', 'classification', 'confidence', 'test']]
final_df.to_csv('consolidated_classifications.csv', index=False)


  .apply(process_group)


## Comitê

#### Voto Majoritário

In [None]:
# Generate all combinations of models
import itertools
df = pd.read_csv('consolidated_classifications.csv')
df["model"] = df["model"].replace({
    "deepseek-r1_14b": "deepseek-r1_14b_no_think",
    "qwen3_14b": "qwen3_14b_no_think"
})
df.to_csv('consolidated_classifications.csv', index=False)

models = df['model'].unique()

comb_2 = list(itertools.combinations(models, 2))
comb_3 = list(itertools.combinations(models, 3))

In [None]:
import pandas as pd

# 1) Leitura dos dados
df = pd.read_csv("consolidated_classifications.csv")

# 2) Resultado acumulado
results_vote_majority = pd.DataFrame()

# 3) Loop pelas combinações (agora como lista de tuplas)
for model_1, model_2, model_3 in comb_3:
    committee_models = [model_1, model_2, model_3]

    # Filtra somente os dados dos modelos da combinação
    df_comm = df[df["model"].isin(committee_models)]
    if df_comm.empty:
        continue

    # Voto majoritário por test + source_file
    vote = (
        df_comm
        .groupby(["test", "source_file"])["classification"]
        .agg(lambda x: x.value_counts().idxmax())
    )

    # Metadados auxiliares
    meta = (
        df_comm
        .groupby(["test", "source_file"])
        .agg({
            "font": "first",
            "type": "first"
        })
    )

    # Cria DataFrame de saída para essa combinação
    vote_df = pd.DataFrame({
        "vote": "majority_vote",
        "model_1": model_1,
        "model_2": model_2,
        "model_3": model_3,
        "test": meta.index.get_level_values("test"),
        "font": meta["font"].values,
        "source_file": meta.index.get_level_values("source_file"),
        "type": meta["type"].values,
        "classification": vote.values
    })

    # Adiciona ao acumulador
    results_vote_majority = pd.concat([results_vote_majority, vote_df], ignore_index=True)

# 4) Salva o resultado final
results_vote_majority.to_csv("consolidated_classifications_with_vote.csv", index=False, encoding="utf-8")


#### Votação Ponderada por Confiança

In [None]:
import pandas as pd

def weighted_vote_classification(group: pd.DataFrame) -> str:
    """
    Retorna a classificação com maior soma de confiança no grupo.
    """
    sums = group.groupby("classification")["confidence"].sum()
    return sums.idxmax()

INPUT_CSV_CLASSIFICATIONS = "consolidated_classifications.csv"
OUTPUT_CSV_VOTES          = "consolidated_classifications_with_vote.csv"

# 1) Carrega as inferências originais (para gerar os votos ponderados)
df = pd.read_csv(INPUT_CSV_CLASSIFICATIONS)
df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
df = df.dropna(subset=["confidence"])

# 2) Carrega o CSV que já tem os votos majoritários
df_existing_votes = pd.read_csv(OUTPUT_CSV_VOTES)

# 3) Acumulador dos novos votos ponderados
results_weighted = pd.DataFrame()

# 4) Função para processar cada comitê
def process_combination(models, is_comb_3=True):
    df_comm = df[df["model"].isin(models)].copy()
    if df_comm.empty:
        return pd.DataFrame()

    vote = (
        df_comm
        .groupby(["test", "source_file"], group_keys=False)
        .apply(weighted_vote_classification)
        .rename("classification")
    )

    meta = (
        df_comm
        .groupby(["test", "source_file"])
        .agg({
            "font": "first",
            "type": "first"
        })
    )

    model_1 = models[0]
    model_2 = models[1]
    model_3 = models[2] if is_comb_3 else ""

    result = pd.DataFrame({
        "model_1": model_1,
        "model_2": model_2,
        "model_3": model_3,
        "test": meta.index.get_level_values("test"),
        "font": meta["font"].values,
        "source_file": meta.index.get_level_values("source_file"),
        "type": meta["type"].values,
        "classification": vote.values,
        "vote": "weighted_vote"  # marca como ponderado
    })

    return result

# 5) Processa todas as combinações de 3 modelos
for comb in comb_3:
    result_df = process_combination(comb, is_comb_3=True)
    results_weighted = pd.concat([results_weighted, result_df], ignore_index=True)

# 6) Processa todas as combinações de 2 modelos
for comb in comb_2:
    result_df = process_combination(comb, is_comb_3=False)
    results_weighted = pd.concat([results_weighted, result_df], ignore_index=True)

# 7) Concatena os votos ponderados com os já existentes (ex: votos majoritários)
df_final = pd.concat([df_existing_votes, results_weighted], ignore_index=True)

# 8) Salva no mesmo arquivo
df_final.to_csv(OUTPUT_CSV_VOTES, index=False, encoding="utf-8")


#### Seleção Dinâmica de Modelo

In [None]:
import pandas as pd

INPUT_CSV_CLASSIFICATIONS = "consolidated_classifications.csv"
OUTPUT_CSV_VOTES          = "consolidated_classifications_with_vote.csv"

# 1) Carrega as inferências
df = pd.read_csv(INPUT_CSV_CLASSIFICATIONS)
df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
df = df.dropna(subset=["confidence"])

# 2) Carrega votos já existentes (voto majoritário e ponderado)
df_existing_votes = pd.read_csv(OUTPUT_CSV_VOTES)

# 3) Acumulador dos novos votos por seleção dinâmica
results_dynamic = pd.DataFrame()

# 4) Função para processar cada comitê
def process_combination_dynamic(models, is_comb_3=True):
    df_comm = df[df["model"].isin(models)].copy()
    if df_comm.empty:
        return pd.DataFrame()

    # Ordena por confiança e pega o primeiro de cada (test, source_file)
    df_selected = (
        df_comm
        .sort_values(["test", "source_file", "confidence"], ascending=[True, True, False])
        .groupby(["test", "source_file"], as_index=False)
        .first()
        .assign(vote="dynamic_selection")
    )

    # Adiciona colunas de identificação do comitê
    df_selected["model_1"] = models[0]
    df_selected["model_2"] = models[1]
    df_selected["model_3"] = models[2] if is_comb_3 else ""

    return df_selected[[
        "model_1", "model_2", "model_3", "test",
        "font", "source_file", "type", "classification", "vote"
    ]]

# 5) Processa todas as combinações de 3 modelos
for comb in comb_3:
    df_result = process_combination_dynamic(comb, is_comb_3=True)
    results_dynamic = pd.concat([results_dynamic, df_result], ignore_index=True)

# 6) Processa todas as combinações de 2 modelos
for comb in comb_2:
    df_result = process_combination_dynamic(comb, is_comb_3=False)
    results_dynamic = pd.concat([results_dynamic, df_result], ignore_index=True)

# 7) Junta com os votos anteriores e salva
df_final = pd.concat([df_existing_votes, results_dynamic], ignore_index=True)
df_final.to_csv(OUTPUT_CSV_VOTES, index=False, encoding="utf-8")

  df_existing_votes = pd.read_csv(OUTPUT_CSV_VOTES)


In [None]:
import pandas as pd

# Entradas e saídas
INPUT_INFERENCES_CSV = "consolidated_classifications.csv"
VOTES_CSV            = "consolidated_classifications_with_vote.csv"
OUTPUT_CSV           = VOTES_CSV  # sobrescreve o mesmo

# 1) Carrega inferências originais
df_infer = pd.read_csv(INPUT_INFERENCES_CSV)
df_infer["confidence"] = pd.to_numeric(df_infer["confidence"], errors="coerce")
df_infer = df_infer.dropna(subset=["confidence"])

# 2) Cria colunas de comitê para manter padrão com os votos
df_infer["model_1"] = df_infer["model"]
df_infer["model_2"] = ""
df_infer["model_3"] = ""
df_infer["vote"] = "original"

# Reordena colunas no mesmo formato do CSV de votos
df_infer_final = df_infer[[
    "model_1", "model_2", "model_3", "test",
    "font", "source_file", "type", "classification", "vote"
]]

# 3) Carrega votos já existentes
df_votes = pd.read_csv(VOTES_CSV)

# 4) Junta tudo
df_all = pd.concat([df_votes, df_infer_final], ignore_index=True)

# 5) Salva
df_all.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")


  df_votes = pd.read_csv(VOTES_CSV)


✅ Inferências individuais adicionadas a: consolidated_classifications_with_vote.csv


## Resultados

### Gerando métricas finais

In [None]:
import pandas as pd
import numpy as np

INPUT_FILE = "consolidated_classifications_with_vote.csv"
OUTPUT_FILE = "consolidated_classifications_with_vote.csv"

# 4) Reabre e calcula métricas de avaliação (FP, FN, TP, TN)
final_df = pd.read_csv(INPUT_FILE)

final_df["false_positive"] = np.where(
    (final_df["type"] == "safe") &
    (final_df["classification"] == "INTERESTING"),
    1,
    0
)

final_df["false_negative"] = np.where(
    (final_df["type"] == "attack") &
    (final_df["classification"] == "NOT INTERESTING"),
    1,
    0
)

final_df["true_positive"] = np.where(
    (final_df["type"] == "attack") &
    (final_df["classification"] == "INTERESTING"),
    1,
    0
)

final_df["true_negative"] = np.where(
    (final_df["type"] == "safe") &
    (final_df["classification"] == "NOT INTERESTING"),
    1,
    0
)

# 5) Salva o CSV final consolidado
final_df.to_csv(OUTPUT_FILE, index=False)


  final_df = pd.read_csv(INPUT_FILE)


In [None]:
import pandas as pd

# Carrega o CSV
df = pd.read_csv("consolidated_classifications_with_vote.csv")

# Garante que model_2 e model_3 existam e sejam strings (mesmo que vazias)
for col in ["model_2", "model_3"]:
    if col not in df.columns:
        df[col] = ""
    else:
        df[col] = df[col].fillna("").astype(str)

# Função para calcular métricas por grupo (voto + modelos + fonte + teste)
def calcular_metricas(grupo):
    tp = grupo["true_positive"].sum()
    tn = grupo["true_negative"].sum()
    fp = grupo["false_positive"].sum()
    fn = grupo["false_negative"].sum()

    total = tp + tn + fp + fn

    acc = (tp + tn) / total if total else 0
    prec = tp / (tp + fp) if (tp + fp) else 0
    rec = tp / (tp + fn) if (tp + fn) else 0
    f1 = 2 * (prec * rec) / (prec + rec) if (prec + rec) else 0

    return pd.Series({
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1_score": f1
    })

# Agrupa por vote + modelos + font + test
group_cols = ["vote", "model_1", "model_2", "model_3", "font"]

metricas_por_grupo = (
    df.groupby(group_cols)
    .apply(calcular_metricas)
    .reset_index()
)

# Exporta
metricas_por_grupo.to_csv("result_table.csv", decimal=",", sep=";", index=False)


  df = pd.read_csv("consolidated_classifications_with_vote.csv")
  .apply(calcular_metricas)


In [None]:
import pandas as pd

df = pd.read_csv("consolidated_classifications.csv")

grouped = df.groupby(["model", "font"])[
    ["false_positive", "false_negative", "true_positive", "true_negative"]
].sum().reset_index()

display(grouped)
grouped.to_csv("result_table_brute.csv", decimal=',', sep=';', index=False)

Unnamed: 0,model,font,false_positive,false_negative,true_positive,true_negative
0,deepseek-r1_14b,data,8,4,37,11
1,gemma3_12b,data,18,0,41,1
2,llama3.1,data,19,0,41,0
3,mistral-nemo,data,19,0,41,0
4,phi4,data,16,0,41,3
5,qwen3_14b,data,8,3,38,11
6,selecao_dinamica,data,13,1,40,6
7,voto_majoritario,data,11,1,40,8
8,voto_ponderado,data,11,1,40,8


In [None]:
df = pd.read_csv("consolidated_classifications_with_vote.csv")
display(df["source_file"].nunique())
display(df["test"].nunique())
display(df["font"].nunique())

  df = pd.read_csv("consolidated_classifications_with_vote.csv")


300

6

5