In [1]:
%cd ../..

/home/matheus/Desktop/Itens./Itens/Projetos/paper-weak-llm


In [2]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics_for_csvs(root_path):
    results = []
    
    for subdir, _, files in os.walk(root_path):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(subdir, file)
                df = pd.read_csv(file_path)
                
                if 'class' in df.columns and 'predicted_class' in df.columns:
                    df['class'] = df['class'].astype(str)
                    df['predicted_class'] = df['predicted_class'].astype(str)

                    #print(df.isnull().sum())

                    # Cálculo de métricas apenas se houver pelo menos uma previsão
                    non_null_mask = df['predicted_class'].notna()
                    percent_none = 100 * (1 - non_null_mask.mean())  # em %

                    acc = accuracy_score(df['class'], df['predicted_class']) if non_null_mask.any() else 0.0
                    precision = precision_score(df['class'], df['predicted_class'], average='macro') if non_null_mask.any() else 0.0
                    recall = recall_score(df['class'], df['predicted_class'], average='macro') if non_null_mask.any() else 0.0
                    f1 = f1_score(df['class'], df['predicted_class'], average='macro') if non_null_mask.any() else 0.0
                    
                    results.append({
                        'file': file,
                        'folder': os.path.basename(subdir),
                        'accuracy': acc,
                        'precision': precision,
                        'recall': recall,
                        'f1_score': f1,
                        'percent_none': round(percent_none, 4)
                    })
                    print(percent_none)
    
    metrics_df = pd.DataFrame(results)
    
    for folder, group in metrics_df.groupby('folder'):
        print(f"Folder: {folder}")
        display(group.sort_values(by='f1_score', ascending=False))
        print("-" * 50)

# Exemplo de uso
root_directory = "datasets/llm_predict/zero_shot"
compute_metrics_for_csvs(root_directory)


0.0
0.0
0.0
Folder: Dmoz-Computers


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,file,folder,accuracy,precision,recall,f1_score,percent_none
0,deepseek-r1-0528-qwen3-8b.csv,Dmoz-Computers,0.406421,0.515326,0.425667,0.435544,0.0
2,mistral-nemo.csv,Dmoz-Computers,0.385895,0.457137,0.382632,0.382732,0.0
1,lfm-7b.csv,Dmoz-Computers,0.327789,0.474677,0.326316,0.333211,0.0


--------------------------------------------------


In [3]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet

def compute_metrics_for_csvs(root_path):
    results = []
    
    for subdir, _, files in os.walk(root_path):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(subdir, file)
                df = pd.read_csv(file_path)
                
                if 'class' in df.columns and 'predicted_class' in df.columns:
                    df = df[df['predicted_class'] != "other"]
                    df['class'] = df['class'].astype(str)
                    df['predicted_class'] = df['predicted_class'].astype(str)
                    
                    acc = accuracy_score(df['class'], df['predicted_class'])
                    precision = precision_score(df['class'], df['predicted_class'], average='macro')
                    recall = recall_score(df['class'], df['predicted_class'], average='macro')
                    f1 = f1_score(df['class'], df['predicted_class'], average='macro')
                    
                    results.append({
                        'file': file,
                        'folder': os.path.basename(subdir),
                        'accuracy': f"{acc:.4f}",
                        'precision': f"{precision:.4f}",
                        'recall': f"{recall:.4f}",
                        'f1_score': f"{f1:.4f}"
                    })
    
    return pd.DataFrame(results)

def generate_pdf(metrics_df, output_path="metrics_report.pdf"):
    doc = SimpleDocTemplate(output_path, pagesize=A4)
    elements = []
    styles = getSampleStyleSheet()
    
    for folder, group in metrics_df.groupby('folder'):
        # Adiciona título da seção
        elements.append(Paragraph(f"Folder: {folder}", styles['Heading2']))
        elements.append(Spacer(1, 10))
        
        # Dados da tabela
        data = [["Arquivo", "Accuracy", "Precision", "Recall", "F1-score"]]
        for _, row in group.sort_values(by='f1_score', ascending=False).iterrows():
            data.append([row['file'], row['accuracy'], row['precision'], row['recall'], row['f1_score']])
        
        # Criar tabela estilizada
        table = Table(data)
        style = TableStyle([
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('BOTTOMPADDING', (0, 0), (-1, 0), 8),
            ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
            ('GRID', (0, 0), (-1, -1), 1, colors.black)
        ])
        table.setStyle(style)
        elements.append(table)
        elements.append(Spacer(1, 20))  # Espaço entre seções
    
    # Criar PDF
    doc.build(elements)
    print(f"Relatório salvo em {output_path}")

# Exemplo de uso
root_directory = "datasets/llm_predict"
metrics_df = compute_metrics_for_csvs(root_directory)
generate_pdf(metrics_df, "metrics_report.pdf")


ModuleNotFoundError: No module named 'reportlab'