In [None]:
import re
import pandas as pd
from collections import Counter

def parse_claims(claims):
    """
    Estrai specifiche, nomi e metriche dalle claim.
    """
    specifications = []
    names = []
    metrics = []

    for claim in claims:
        # Match the pattern |{Specification, Name} Metric: Value|
        match = re.match(r"claim \d+\|\{([^,]+), ([^}]+)\} ([^:]+): (.+)\|", claim)
        if match:
            specification, name, metric, value = match.groups()
            specifications.append(specification)
            names.append(name)
            metrics.append(metric)

    return specifications, names, metrics

def create_profiling(specifications, names, metrics, filename):
    """
    Crea il profiling delle distribuzioni e salva i risultati in un CSV.
    """
    # Conta le distribuzioni
    spec_distribution = Counter(specifications)
    name_distribution = Counter(names)
    metric_distribution = Counter(metrics)

    # Crea un dataframe per salvare i risultati
    profiling_data = []

    # Aggiungi specifiche al profiling
    for spec, count in spec_distribution.items():
        profiling_data.append({"Key": f"Specification: {spec}", "Count": count})

    # Aggiungi nomi al profiling
    for name, count in name_distribution.items():
        profiling_data.append({"Key": f"Name: {name}", "Count": count})

    # Aggiungi metriche al profiling
    for metric, count in metric_distribution.items():
        profiling_data.append({"Key": f"Metric: {metric}", "Count": count})

    # Crea un DataFrame
    profiling_df = pd.DataFrame(profiling_data)

    # Salva i risultati in un file CSV
    profiling_df.to_csv(filename, index=False)

    return spec_distribution, name_distribution, metric_distribution

In [6]:
def main():
    # Leggi le claim dal file claims_test.txt
    with open('claims_test.txt', 'r') as file:
        claims = file.readlines()

    # Estrai le informazioni dalle claim
    specifications, names, metrics = parse_claims(claims)

    # Nome del file CSV
    filename = "NAME_PROFILING.csv"

    # Crea il profiling e ottieni le distribuzioni
    spec_distribution, name_distribution, metric_distribution = create_profiling(specifications, names, metrics, filename)

    # Stampa le distribuzioni
    print("Distribuzione delle Specifiche:")
    for spec, count in spec_distribution.items():
        print(f"{spec}: {count}")

    print("\nDistribuzione dei Nomi:")
    for name, count in name_distribution.items():
        print(f"{name}: {count}")

    print("\nDistribuzione delle Metriche:")
    for metric, count in metric_distribution.items():
        print(f"{metric}: {count}")

if __name__ == "__main__":
    main()

Distribuzione delle Specifiche:
Models: 30
Discriminators: 42

Distribuzione dei Nomi:
EXALT Baseline: 3
ZSEC-gpt4turbo: 3
ZSEC-gpt4o: 3
MBCAWF: 3
MIAWF-3444built on ZSEC-gpt4o and Ensemble-9: 3
MIAWF-5555built on MIAWF-4 (which is built on MIAWF-3 and Ensemble-8) and Ensemble-8: 3
Ensemble-9666Ensemble of 9 models (see Table3in AppendixB): 3
Ensemble-8777Ensemble of 8 models (see Table3in AppendixB): 3
Ensemble-17888Ensemble of 17 models (see Table3in AppendixB): 3
Ensemble-19999Ensemble of 19 models (see Table3in AppendixB): 3
CodeLlama-13B: 6
GPT-3.5-Turbo: 6
CodeLlama-13B-FT: 6
CodeLlama-13BE: 6
GPT-3.5-TurboE: 6
CodeLlama-13B-FTE: 6
Oracle Simulation (τ=1.0𝜏1.0\tau=1.0): 6

Distribuzione delle Metriche:
F1-score: 10
Precision: 10
Recall: 10
Spider (Greedy Gen = 62.3) Re-ranking: 7
Spider (Greedy Gen = 62.3) Iter. Correct.: 7
Spider (Greedy Gen = 62.3) Tree Search: 7
Bird (Greedy Gen = 16.0) Re-ranking: 7
Bird (Greedy Gen = 16.0) Iter. Correct.: 7
Bird (Greedy Gen = 16.0) Tree Sear