In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
import os
import ast
from collections import defaultdict

vulnerabilities = [
    "denial_of_service", "other", "unchecked_low_level_calls",
    "access_control", "reentrancy", "front_running",
    "bad_randomness", "arithmetic", "time_manipulation"
]
models = ["FLAMES20k", "FLAMES100k", "CodeLlama7B"]
injection_strategies = ["VL", "pre_post", "pre", "post", "pre_VL_post", "pre_VL", "VL_post"]

INPUT_FOLDER = "data/isolated"
OUTPUT_FOLDER = "figures/table"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

def parse_result_cell_to_tuple(cell):
    if not cell or cell == '' or cell == 'nan':
        return (0, 0, 0)
    try:
        result_dict = ast.literal_eval(cell)
        sanity = 1 if result_dict.get('Sanity_Test_Success', False) else 0
        exploit = 1 if result_dict.get('Exploit_Covered', False) else 0
        accepted_patch = 1 if (sanity == 1 and exploit == 1) else 0
        return (sanity, exploit, accepted_patch)
    except Exception:
        return (0, 0, 0)

def tuple_to_str(t):
    return f"{t[0]} / {t[1]} / {t[2]}"

def generate_columns():
    return [f"{model}|{strategy}" for model in models for strategy in injection_strategies]

def create_empty_table():
    columns = generate_columns()
    df = pd.DataFrame(columns=["Vulnerability"] + columns)
    df["Vulnerability"] = vulnerabilities
    return df

def load_all_csv_data(input_folder):
    combined_data = []
    files = os.listdir(input_folder)
    for filename in files:
        if not filename.endswith(".csv"):
            continue
        model_found = None
        for model in models:
            if model in filename:
                model_found = model
                break
        if model_found is None:
            continue
        df = pd.read_csv(os.path.join(input_folder, filename), delimiter=';')
        combined_data.append((model_found, df))
    return combined_data

def populate_table_with_counters(data_list):
    counters = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
    vuln_occurrences = defaultdict(int)

    for model_name, df in data_list:
        for _, row in df.iterrows():
            vuln = row["vulnerability_type"]
            if vuln not in vulnerabilities:
                continue
            vuln_occurrences[vuln] += 1
            for strategy in injection_strategies:
                cell = row.get(strategy, None)
                sanity, exploit, accepted = parse_result_cell_to_tuple(cell)
                counts = counters[vuln][model_name][strategy]
                counts[0] += sanity
                counts[1] += exploit
                counts[2] += accepted

    table_df = create_empty_table()
    table_df["Vulnerability"] = [
        f"{vuln} ({int(vuln_occurrences[vuln] / 3)})" for vuln in vulnerabilities
    ]

    for vuln in vulnerabilities:
        vuln_label = f"{vuln} ({int(vuln_occurrences[vuln] / 3)})"
        for model in models:
            for strategy in injection_strategies:
                counts = counters[vuln][model][strategy]
                col_name = f"{model}|{strategy}"
                table_df.loc[table_df["Vulnerability"] == vuln_label, col_name] = tuple_to_str(counts)

    return table_df

def wrap_vulnerability_name(vuln_name):
    if '_' in vuln_name:
        first, rest = vuln_name.split('_', 1)
        return f"{first}\n{rest}"
    return vuln_name

def wrap_column_name(col_name):
    if '|' in col_name:
        return col_name.replace('|', ' |\n')
    return col_name

def save_df_as_image_multi_blocks(df, filename_prefix, block_size=7, dpi=400):
    data_columns = df.columns[1:]
    col_blocks = [data_columns[i:i + block_size] for i in range(0, len(data_columns), block_size)]
    image_paths = []

    for i, block in enumerate(col_blocks):
        sub_df = df[["Vulnerability"] + list(block)].copy()
        sub_df["Vulnerability"] = sub_df["Vulnerability"].apply(wrap_vulnerability_name)
        wrapped_columns = ["Vulnerability"] + [wrap_column_name(col) for col in block]

        fig, ax = plt.subplots(figsize=(1.5 + len(block) * 2.2, 10))
        ax.axis('off')
        table = ax.table(cellText=sub_df.values,
                         colLabels=wrapped_columns,
                         cellLoc='center',
                         loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(15)
        table.scale(1.3, 3.5)
        plt.tight_layout()
        output_path = f"{filename_prefix}_part{i+1}.png"
        image_paths.append(output_path)
        plt.savefig(output_path, dpi=dpi, bbox_inches='tight')
        plt.close()

    return image_paths

def create_pdf_from_images(image_files, output_pdf):
    pdf = FPDF()
    for image in image_files:
        pdf.add_page()
        pdf.image(image, x=10, y=10, w=pdf.w - 20)
    pdf.output(output_pdf)

all_data = load_all_csv_data(INPUT_FOLDER)
aggregated_table = populate_table_with_counters(all_data)
image_files = save_df_as_image_multi_blocks(aggregated_table, os.path.join(OUTPUT_FOLDER, "isolated_table"))
#create_pdf_from_images(image_files, os.path.join(OUTPUT_FOLDER, "isolated_table.pdf"))

print("✅ Table images successfully created.")
