In [18]:
import pandas as pd
import random
import os
from synthcity.metrics.eval_statistical import AlphaPrecision
from synthcity.plugins.core.dataloader import GenericDataLoader

# Load real data once
real = pd.read_csv("Raw Data/heart_failure_data.csv")
real['tar'] = random.choices([0, 1], k=len(real))
data_loader_real = GenericDataLoader(real)
data_loader_real_encoded = data_loader_real.encode()[0]

In [19]:
methods = ["synthpop", "arf", "privbayes", "ctgan", "tvae", "tabsyn"]
num_datasets = 5

# Store results
results = {}

for method in methods:
    method_results = []
    for i in range(1, num_datasets + 1):
        syn_path = f"Data/{method}/syn_heart_failure_{method}_{i}.csv"
        
        if not os.path.exists(syn_path):
            print(f"File not found: {syn_path}")
            continue
        
        syn = pd.read_csv(syn_path)
        syn['tar'] = random.choices([0, 1], k=len(syn))
        data_loader_syn = GenericDataLoader(syn)
        data_loader_syn_encoded = data_loader_syn.encode()[0]

        metric_pra = AlphaPrecision()
        pra = metric_pra._evaluate(data_loader_real_encoded, data_loader_syn_encoded)
        pra_oc = list(pra.values())[:3]
        
        method_results.append(pra_oc)
    
    results[method] = method_results

In [26]:
print(results)

{'synthpop': [[0.9691883905541783, 0.5166303558460422, 0.47167755991285404], [0.9790799589312098, 0.4954248366013071, 0.4869281045751634], [0.9791926476848721, 0.49658678286129265, 0.5043572984749455], [0.9816667918763929, 0.48714596949891065, 0.48366013071895425], [0.9598627701399844, 0.4838053740014524, 0.49782135076252726]], 'arf': [[0.9170660856935366, 0.44996368917937546, 0.5315904139433552], [0.9142338416848221, 0.4042846768336965, 0.5337690631808278], [0.9406505897378108, 0.43071895424836604, 0.5065359477124183], [0.9275311146169835, 0.44974582425562815, 0.528322440087146], [0.8742193173565722, 0.40137981118373267, 0.5206971677559913]], 'privbayes': [[0.7201161946259986, 0.25061728395061733, 0.6318082788671024], [0.677923021060276, 0.24444444444444458, 0.6612200435729847], [0.6453885257806826, 0.22810457516339844, 0.6427015250544662], [0.6351488743645606, 0.22962962962962963, 0.6350762527233116], [0.6374001452432825, 0.2404502541757444, 0.664488017429194]], 'ctgan': [[0.85596373

In [23]:
# Define metric names
metric_names = ["alpha_precision", "beta_recall", "authenticity"]

# Build long-format rows
rows = []
for method, method_results in results.items():
    for i, pra_values in enumerate(method_results, 1):
        for metric_index, metric_name in enumerate(metric_names):
            value = pra_values[metric_index] if len(pra_values) > metric_index else None
            rows.append({
                "method": method,
                "dataset_index": i,
                "metric": metric_name,
                "value": value
            })

# Create DataFrame
df_long = pd.DataFrame(rows)

# Save to CSV
df_long.to_csv("alpha_precision_results_long.csv", index=False)
print("Long-format results saved to alpha_precision_results_long.csv")

Long-format results saved to alpha_precision_results_long.csv


In [24]:
print(df_long.head())

     method  dataset_index           metric     value
0  synthpop              1  alpha_precision  0.969188
1  synthpop              1      beta_recall  0.516630
2  synthpop              1     authenticity  0.471678
3  synthpop              2  alpha_precision  0.979080
4  synthpop              2      beta_recall  0.495425


In [25]:
# Berechne den Mittelwert pro Methode und Metrik
mean_df = df_long.groupby(["method", "metric"])["value"].mean().reset_index()

# Optional: schöner formatieren
mean_df = mean_df.pivot(index="method", columns="metric", values="value").reset_index()

# Speichern als CSV
mean_df.to_csv("alpha_precision_results_means.csv", index=False)
print("Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv")

Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv
