In [7]:
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
import os
import ast
from collections import defaultdict

vulnerabilities = [
    "denial_of_service", "other", "unchecked_low_level_calls",
    "access_control", "reentrancy", "front_running",
    "bad_randomness", "arithmetic", "time_manipulation"
]
models = ["FLAMES20k", "FLAMES100k", "CodeLlama7B"]
injection_strategies = ["VL", "pre_post", "pre", "post", "pre_VL_post", "pre_VL", "VL_post"]

INPUT_FOLDER = "data/aggregated"
OUTPUT_FOLDER = "figures/table"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)


def parse_result_cell_to_tuple(cell):
    if not cell or cell == '' or cell == 'nan':
        return (0, 0, 0)
    try:
        result_dict = ast.literal_eval(cell)
        sanity = 1 if result_dict.get('Sanity_Test_Success', False) else 0
        exploit = 1 if result_dict.get('Exploit_Covered', False) else 0
        accepted_patch = 1 if (sanity == 1 and exploit == 1) else 0
        return (sanity, exploit, accepted_patch)
    except Exception as e:
        print(f"Error parsing cell: {cell} -> {e}")
        return (0, 0, 0)

def tuple_to_str(t):
    return f"{t[0]} / {t[1]} / {t[2]}"

def generate_columns():
    return [f"{model} | {strategy}" for model in models for strategy in injection_strategies]


def create_empty_table():
    columns = generate_columns()
    df = pd.DataFrame(columns=["Vulnerability"] + columns)
    df["Vulnerability"] = vulnerabilities
    return df

def wrap_text(text, width=15):
    text = str(text)
    if len(text) <= width:
        return text
    else:
        return '\n'.join([text[i:i+width] for i in range(0, len(text), width)])

def save_df_as_image(df, filename, dpi=100):
    
    wrapped_columns = list(df.columns)
    df_wrapped = df.copy()

    fig, ax = plt.subplots(figsize=(70, 30))
    ax.axis('off')
    table = ax.table(cellText=df_wrapped.values, colLabels=wrapped_columns, cellLoc='center', loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(26)
    table.scale(2.5, 3.5)
    plt.savefig(filename, dpi=dpi, bbox_inches='tight')
    plt.close()

def create_pdf_from_images(image_files, output_pdf):
    pdf = FPDF()
    for image in image_files:
        pdf.add_page()
        pdf.image(image, x=10, y=10, w=pdf.w - 20)
    pdf.output(output_pdf)

def load_all_csv_data(input_folder):
    combined_data = []
    files = os.listdir(input_folder)
    print(f"Found files: {files}")
    for filename in files:
        full_path = os.path.join(input_folder, filename)
        if not filename.endswith(".csv"):
            print(f"Skipping {filename} (not a CSV)")
            continue
        
        model_found = None
        for model in models:
            if model in filename:
                model_found = model
                break
        if model_found is None:
            print(f"Skipping {filename} (no model match)")
            continue
        
        print(f"Reading {filename} for model {model_found}")
        df = pd.read_csv(full_path, delimiter=';')
        combined_data.append((model_found, df))
    return combined_data

def populate_table_with_counters(data_list):
    counters = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
    vuln_occurrences = defaultdict(int)  

    for model_name, df in data_list:
        for _, row in df.iterrows():
            vuln = row["vulnerability_type"]
            if vuln not in vulnerabilities:
                continue
            vuln_occurrences[vuln] += 1  

            for strategy in injection_strategies:
                cell = row.get(strategy, None)
                if cell is None:
                    continue
                sanity, exploit, accepted = parse_result_cell_to_tuple(cell)
                counts = counters[vuln][model_name][strategy]
                counts[0] += sanity
                counts[1] += exploit
                counts[2] += accepted

    table_df = create_empty_table()

    table_df["Vulnerability"] = [
        f"{vuln} ({vuln_occurrences[vuln]//3})" for vuln in vulnerabilities
    ]

    for vuln in vulnerabilities:
        for model in models:
            for strategy in injection_strategies:
                counts = counters[vuln][model][strategy]
                col_name = f"{model} | {strategy}"
                table_df.loc[table_df["Vulnerability"].str.startswith(vuln), col_name] = tuple_to_str(counts)

    return table_df

all_data = load_all_csv_data(INPUT_FOLDER)

aggregated_table = populate_table_with_counters(all_data)  

save_df_as_image(aggregated_table, os.path.join(OUTPUT_FOLDER, "aggregated_table.png"))

#create_pdf_from_images([os.path.join(OUTPUT_FOLDER, "isolated_table.png")], os.path.join(OUTPUT_FOLDER, "isolated_table.pdf"))

print("Tables generated and PDFs created successfully!")


Found files: ['validation_results_CodeLlama7B_aggregated.csv', 'validation_results_FLAMES20k_aggregated.csv', 'validation_results_FLAMES100k_aggregated.csv']
Reading validation_results_CodeLlama7B_aggregated.csv for model CodeLlama7B
Reading validation_results_FLAMES20k_aggregated.csv for model FLAMES20k
Reading validation_results_FLAMES100k_aggregated.csv for model FLAMES100k
Tables generated and PDFs created successfully!
