In [1]:
import pandas as pd
import random
import os
from synthcity.metrics.eval_statistical import AlphaPrecision
from synthcity.plugins.core.dataloader import GenericDataLoader

# Load real data once
real = pd.read_csv("Raw Data/data_small.csv")
real['tar'] = random.choices([0, 1], k=len(real))
data_loader_real = GenericDataLoader(real)
data_loader_real_encoded = data_loader_real.encode()[0]

    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


In [2]:
print(real.head())

   outcome itt_treat  nihss  randdelay vis_infarct   age  tar
0        2     rt-PA     10   3.616667          No  81.0    0
1        1   Placebo     18   1.866667          No  92.0    1
2        1   Placebo      4   5.083333          No  75.0    1
3        1     rt-PA     13   4.333333         Yes  60.0    0
4        2     rt-PA      6   1.916667          No  88.0    0


In [3]:
methods = ["synthpop", "arf", "privbayes", "ctgan", "tvae", "tabsyn"]
num_datasets = 5

# Store results
results = {}

for method in methods:
    method_results = []
    for i in range(1, num_datasets + 1):
        syn_path = f"Data/{method}/syn_data_small_{method}_{i}.csv"
        
        if not os.path.exists(syn_path):
            print(f"File not found: {syn_path}")
            continue
        
        syn = pd.read_csv(syn_path)
        syn['tar'] = random.choices([0, 1], k=len(syn))
        data_loader_syn = GenericDataLoader(syn)
        data_loader_syn_encoded = data_loader_syn.encode()[0]

        metric_pra = AlphaPrecision()
        pra = metric_pra._evaluate(data_loader_real_encoded, data_loader_syn_encoded)
        pra_oc = list(pra.values())[:3]
        
        method_results.append(pra_oc)
    
    results[method] = method_results

# Example: print results for one method
print("Results for tabsyn:", results["tabsyn"])

Results for tabsyn: [[0.9794338086311045, 0.46756727073036797, 0.499835255354201], [0.9718025336590354, 0.48215266337177376, 0.4876441515650741], [0.9825257058455945, 0.4906534870950029, 0.49093904448105435], [0.9726584483705429, 0.48199890170236137, 0.5228995057660626], [0.9864030752333882, 0.4898846787479407, 0.5169686985172982]]


In [4]:
print(results)

{'synthpop': [[0.9900304872275559, 0.5360571114772104, 0.47775947281713343], [0.992483099471681, 0.5487095002745743, 0.4645799011532125], [0.9899373212899316, 0.5297968149368478, 0.47215815485996704], [0.993374614175614, 0.5348050521691379, 0.48533772652388796], [0.992130129334015, 0.5467105985722132, 0.4599670510708402]], 'arf': [[0.967966823836846, 0.477056562328391, 0.5070840197693575], [0.9696536575204984, 0.501900054914882, 0.48467874794069193], [0.9649090117214869, 0.5038769906644701, 0.4883031301482702], [0.9519740953246606, 0.48235035694673256, 0.48599670510708404], [0.9548077032324036, 0.4675013728720484, 0.5182866556836903]], 'privbayes': [[0.6770126304228445, 0.29741900054914894, 0.6230642504118616], [0.6764854475562878, 0.2695002745744097, 0.6204283360790774], [0.6583415705656233, 0.2907633168588688, 0.6171334431630972], [0.6828775398132895, 0.3047116968698518, 0.6131795716639209], [0.6704448105436572, 0.30642504118616143, 0.5957166392092257]], 'ctgan': [[0.8803840254502073

In [5]:
# Define metric names
metric_names = ["alpha_precision", "beta_recall", "authenticity"]

# Build long-format rows
rows = []
for method, method_results in results.items():
    for i, pra_values in enumerate(method_results, 1):
        for metric_index, metric_name in enumerate(metric_names):
            value = pra_values[metric_index] if len(pra_values) > metric_index else None
            rows.append({
                "method": method,
                "dataset_index": i,
                "metric": metric_name,
                "value": value
            })

# Create DataFrame
df_long = pd.DataFrame(rows)

# Save to CSV
df_long.to_csv("alpha_precision_results_long.csv", index=False)
print("Long-format results saved to alpha_precision_results_long.csv")

Long-format results saved to alpha_precision_results_long.csv


In [6]:
# Berechne den Mittelwert pro Methode und Metrik
mean_df = df_long.groupby(["method", "metric"])["value"].mean().reset_index()

# Optional: schöner formatieren
mean_df = mean_df.pivot(index="method", columns="metric", values="value").reset_index()

# Speichern als CSV
mean_df.to_csv("alpha_precision_results_means.csv", index=False)
print("Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv")


Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv
