In [1]:
import pandas as pd
import random
import os
from synthcity.metrics.eval_statistical import AlphaPrecision
from synthcity.plugins.core.dataloader import GenericDataLoader

# Load real data once
real = pd.read_csv("Data/medical_insurance_2.csv")
real['tar'] = random.choices([0, 1], k=len(real))
data_loader_real = GenericDataLoader(real)
data_loader_real_encoded = data_loader_real.encode()[0]

    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


In [2]:
methods = ["synthpop", "arf", "privbayes", "ctgan", "tvae", "tabsyn"]
num_datasets = 5

# Store results
results = {}

for method in methods:
    method_results = []
    for i in range(1, num_datasets + 1):
        syn_path = f"Data/{method}/syn_medical_insurance_{method}_{i}.csv"
        
        if not os.path.exists(syn_path):
            print(f"File not found: {syn_path}")
            continue
        
        syn = pd.read_csv(syn_path)
        syn['tar'] = random.choices([0, 1], k=len(syn))
        data_loader_syn = GenericDataLoader(syn)
        data_loader_syn_encoded = data_loader_syn.encode()[0]

        metric_pra = AlphaPrecision()
        pra = metric_pra._evaluate(data_loader_real_encoded, data_loader_syn_encoded)
        pra_oc = list(pra.values())[:3]
        
        method_results.append(pra_oc)
    
    results[method] = method_results

ValueError: The test_size = 555 should be greater or equal to the number of classes = 1337

In [None]:
# Define metric names
metric_names = ["alpha_precision", "beta_recall", "authenticity"]

# Build long-format rows
rows = []
for method, method_results in results.items():
    for i, pra_values in enumerate(method_results, 1):
        for metric_index, metric_name in enumerate(metric_names):
            value = pra_values[metric_index] if len(pra_values) > metric_index else None
            rows.append({
                "method": method,
                "dataset_index": i,
                "metric": metric_name,
                "value": value
            })

# Create DataFrame
df_long = pd.DataFrame(rows)

# Save to CSV
df_long.to_csv("alpha_precision_results_long.csv", index=False)
print("Long-format results saved to alpha_precision_results_long.csv")

In [None]:
# Berechne den Mittelwert pro Methode und Metrik
mean_df = df_long.groupby(["method", "metric"])["value"].mean().reset_index()

# Optional: schöner formatieren
mean_df = mean_df.pivot(index="method", columns="metric", values="value").reset_index()

# Speichern als CSV
mean_df.to_csv("alpha_precision_results_means.csv", index=False)
print("Mittelwerte pro Methode gespeichert in alpha_precision_results_means.csv")