In [11]:
import pandas as pd
import random
import os
from synthcity.metrics.eval_statistical import AlphaPrecision
from synthcity.plugins.core.dataloader import GenericDataLoader

# Load real data once
real = pd.read_csv("Raw Data/data_small.csv")
real['tar'] = random.choices([0, 1], k=len(real))
data_loader_real = GenericDataLoader(real)
data_loader_real_encoded = data_loader_real.encode()[0]



In [2]:
print(real.head())

   outcome itt_treat  nihss  randdelay vis_infarct   age
0        2     rt-PA     10   3.616667          No  81.0
1        1   Placebo     18   1.866667          No  92.0
2        1   Placebo      4   5.083333          No  75.0
3        1     rt-PA     13   4.333333         Yes  60.0
4        2     rt-PA      6   1.916667          No  88.0


In [13]:
methods = ["synthpop", "arf", "privbayes", "ctgan", "tvae", "tabsyn"]
num_datasets = 5

# Store results
results = {}

for method in methods:
    method_results = []
    for i in range(1, num_datasets + 1):
        syn_path = f"Data/{method}/syn_data_small_{method}_{i}.csv"
        
        if not os.path.exists(syn_path):
            print(f"File not found: {syn_path}")
            continue
        
        syn = pd.read_csv(syn_path)
        syn['tar'] = random.choices([0, 1], k=len(syn))
        data_loader_syn = GenericDataLoader(syn)
        data_loader_syn_encoded = data_loader_syn.encode()[0]

        metric_pra = AlphaPrecision()
        pra = metric_pra._evaluate(data_loader_real_encoded, data_loader_syn_encoded)
        pra_oc = list(pra.values())[:3]
        
        method_results.append(pra_oc)
    
    results[method] = method_results

# Example: print results for one method
print("Results for tabsyn:", results["tabsyn"])

Results for tabsyn: [[0.9871680963472136, 0.492300933552993, 0.5024711696869851], [0.9803336552481584, 0.48733662822624935, 0.49028006589785833], [0.9796920979378515, 0.48085667215815486, 0.5090609555189456], [0.9771538942225757, 0.5115211422295443, 0.49357495881383856], [0.9774621750080479, 0.5011751784733663, 0.5064250411861615]]


In [14]:
print(results)

{'synthpop': [[0.9873476112026359, 0.5822075782537067, 0.4408566721581549], [0.9818084038705523, 0.5923997803404724, 0.44217462932454693], [0.9840405991402981, 0.5838769906644701, 0.4270181219110379], [0.9865886496619894, 0.5874135090609556, 0.42800658978583195], [0.9914575167111667, 0.6010543657331137, 0.44349258649093903]], 'arf': [[0.9682069344240565, 0.5008896210873147, 0.4932454695222405], [0.9708276998238937, 0.5018561230093355, 0.485667215815486], [0.9578541536480524, 0.4954200988467875, 0.49654036243822075], [0.9532784184514004, 0.50277869302581, 0.49719934102141683], [0.9448654585392642, 0.4805930807248765, 0.5126853377265239]], 'privbayes': [[0.6674135090609555, 0.32518396485447565, 0.6319604612850083], [0.6676990664470072, 0.32628226249313563, 0.6217462932454695], [0.6598572213069742, 0.31507962657880273, 0.6260296540362438], [0.6825480505216914, 0.31967051070840213, 0.6088962108731466], [0.6725755079626579, 0.31079626578802855, 0.6164744645799012]], 'ctgan': [[0.95258838455

In [15]:
# Define metric names
metric_names = ["alpha_precision", "beta_recall", "authenticity"]

# Build long-format rows
rows = []
for method, method_results in results.items():
    for i, pra_values in enumerate(method_results, 1):
        for metric_index, metric_name in enumerate(metric_names):
            value = pra_values[metric_index] if len(pra_values) > metric_index else None
            rows.append({
                "method": method,
                "dataset_index": i,
                "metric": metric_name,
                "value": value
            })

# Create DataFrame
df_long = pd.DataFrame(rows)

# Save to CSV
df_long.to_csv("alpha_precision_results_long.csv", index=False)
print("Long-format results saved to alpha_precision_results_long.csv")

Long-format results saved to alpha_precision_results_long.csv


In [17]:
# Berechne den Mittelwert pro Methode und Metrik
mean_df = df_long.groupby(["method", "metric"])["value"].mean().reset_index()

# Optional: schöner formatieren
mean_df = mean_df.pivot(index="method", columns="metric", values="value").reset_index()

# Speichern als CSV
mean_df.to_csv("alpha_precision_results_means.csv", index=False)
print("Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv")


Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv
