In [3]:
import pandas as pd
import random
import os
from synthcity.metrics.eval_statistical import AlphaPrecision
from synthcity.plugins.core.dataloader import GenericDataLoader

# Load real data once
real = pd.read_csv("Data/medical_insurance_2.csv")
real['tar'] = random.choices([0, 1], k=len(real))
data_loader_real = GenericDataLoader(real)
data_loader_real_encoded = data_loader_real.encode()[0]

In [4]:
methods = ["synthpop", "arf", "privbayes", "ctgan", "tvae", "tabsyn"]
num_datasets = 5

# Store results
results = {}

for method in methods:
    method_results = []
    for i in range(1, num_datasets + 1):
        syn_path = f"Data/{method}/syn_medical_insurance_{method}_{i}.csv"
        
        if not os.path.exists(syn_path):
            print(f"File not found: {syn_path}")
            continue
        
        syn = pd.read_csv(syn_path)
        syn['tar'] = random.choices([0, 1], k=len(syn))
        data_loader_syn = GenericDataLoader(syn)
        data_loader_syn_encoded = data_loader_syn.encode()[0]

        metric_pra = AlphaPrecision()
        pra = metric_pra._evaluate(data_loader_real_encoded, data_loader_syn_encoded)
        pra_oc = list(pra.values())[:3]
        
        method_results.append(pra_oc)
    
    results[method] = method_results

In [7]:
print(results)

{'synthpop': [[0.9902572523262179, 0.31467051467051455, 0.70995670995671], [0.9841667910633428, 0.32311207311207313, 0.6962481962481962], [0.981065996583238, 0.31185666185666194, 0.6984126984126984], [0.9893275945000083, 0.31945646945646944, 0.6991341991341992], [0.9824907531804083, 0.3232082732082733, 0.6854256854256854]], 'arf': [[0.9475991441508683, 0.2172919672919672, 0.7842712842712842], [0.9513351909903635, 0.22443482443482465, 0.7954545454545454], [0.9632880529432254, 0.22979797979797978, 0.7723665223665224], [0.9717934683451925, 0.21899951899951897, 0.7781385281385281], [0.9696015989119438, 0.21659451659451656, 0.7846320346320347]], 'privbayes': [[0.7245310245310246, 0.1539923039923039, 0.8376623376623377], [0.7178210678210677, 0.16048581048581034, 0.838023088023088], [0.7313131313131314, 0.1544492544492544, 0.8459595959595959], [0.7138047138047139, 0.1505291005291005, 0.816017316017316], [0.737999037999038, 0.1527176527176528, 0.8448773448773449]], 'ctgan': [[0.932645834369972

In [5]:
# Define metric names
metric_names = ["alpha_precision", "beta_recall", "authenticity"]

# Build long-format rows
rows = []
for method, method_results in results.items():
    for i, pra_values in enumerate(method_results, 1):
        for metric_index, metric_name in enumerate(metric_names):
            value = pra_values[metric_index] if len(pra_values) > metric_index else None
            rows.append({
                "method": method,
                "dataset_index": i,
                "metric": metric_name,
                "value": value
            })

# Create DataFrame
df_long = pd.DataFrame(rows)

# Save to CSV
df_long.to_csv("alpha_precision_results_long.csv", index=False)
print("Long-format results saved to alpha_precision_results_long.csv")

Long-format results saved to alpha_precision_results_long.csv


In [6]:
# Berechne den Mittelwert pro Methode und Metrik
mean_df = df_long.groupby(["method", "metric"])["value"].mean().reset_index()

# Optional: schöner formatieren
mean_df = mean_df.pivot(index="method", columns="metric", values="value").reset_index()

# Speichern als CSV
mean_df.to_csv("alpha_precision_results_means.csv", index=False)
print("Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv")

Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv
