In [1]:
import time
import json
import os

from synthcity.plugins.core.dataloader import GenericDataLoader
from synthcity.plugins import Plugins
from synthcity.utils.reproducibility import enable_reproducible_results
import openml
from sklearn.model_selection import KFold

from metrics import evaluate

  from .autonotebook import tqdm as notebook_tqdm


    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


In [11]:

# ---------------------------------
# BENCHMARK PARAMETERS

generator = "arf"
metrics = [
    "mmd",
    "wasserstein",
    "precision-recall",
    "authenticity",
    "domias"
]
cv_folds = 2
n_init = 1
seed = 0
enable_reproducible_results(seed)
hparams = {}
results = {}

# load data
dataset = openml.datasets.get_dataset("Diabetes130US")
X, _, _, _ = dataset.get_data(dataset_format="dataframe")
X = X.drop(["encounter_id", "patient_nbr"], axis=1)

X = X[:1000]

# ---------------------------------
# START BENCHMARKING
# perform k fold CV
time_start = time.perf_counter()
for fold, (train, test) in enumerate(
    KFold(n_splits=cv_folds, shuffle=True, random_state=seed).split(X)
):
    print(f"fold: {fold}")
    results[f"fold: {fold}"] = {}
    # get train-test data
    X_train = GenericDataLoader(data=X.iloc[train])
    X_test = GenericDataLoader(data=X.iloc[test])

    # synthesize for multiple initializations
    for i in range(n_init):
        hparams["random_state"] = i
        plugin = Plugins().get(generator, **hparams)
        # unconditional generation (we do not consider a specific target feature)
        plugin.fit(X_train)
        X_syn = plugin.generate(len(test))
        # evaluation
        results[f"fold: {fold}"][f"init: {i}"] = evaluate(
            X_train.dataframe(),
            X_test.dataframe(),
            X_syn.dataframe(),
            metrics,
            random_state=seed,  # we use the same random state for metrics across initializations
        )
time_end = time.perf_counter()
results["timer"] = time_end - time_start
# save results
if not os.path.exists("results"):
    os.makedirs("results")
with open(f"results/{generator}.json", "w") as f:
    json.dump(results, f, indent=4)


[2025-04-07T14:38:12.368858+0200][25748][CRITICAL] module disabled: c:\Users\jlachterberg\Documents\EvaluationMetricsSD\EvaluationMetricsSD\.conda\lib\site-packages\synthcity\plugins\generic\plugin_goggle.py
[2025-04-07T14:38:12.369858+0200][25748][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_great' has no attribute 'plugin'
[2025-04-07T14:38:12.369858+0200][25748][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_great' has no attribute 'plugin'
[2025-04-07T14:38:12.370859+0200][25748][CRITICAL] module plugin_great load failed


fold: 0
Initial accuracy is 0.652
Iteration number 1 reached accuracy of 0.459.
Epoch 1/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.3908
Epoch 2/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3376
Epoch 3/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3348
Epoch 4/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3349
Epoch 5/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3349
Epoch 6/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3345
Epoch 7/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3335
Epoch 8/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3340
Epoch 9/10
[1m331/331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3344
Epoch 10/10

[2025-04-07T14:38:36.385775+0200][25748][CRITICAL] module disabled: c:\Users\jlachterberg\Documents\EvaluationMetricsSD\EvaluationMetricsSD\.conda\lib\site-packages\synthcity\plugins\generic\plugin_goggle.py
[2025-04-07T14:38:36.386711+0200][25748][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_great' has no attribute 'plugin'
[2025-04-07T14:38:36.387724+0200][25748][CRITICAL] load failed: module 'synthcity.plugins.generic.plugin_great' has no attribute 'plugin'
[2025-04-07T14:38:36.388729+0200][25748][CRITICAL] module plugin_great load failed


fold: 1
Initial accuracy is 0.644
Iteration number 1 reached accuracy of 0.45.
Epoch 1/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.3951
Epoch 2/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3394
Epoch 3/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3365
Epoch 4/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3361
Epoch 5/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3358
Epoch 6/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3365
Epoch 7/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3354
Epoch 8/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3352
Epoch 9/10
[1m333/333[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3347
Epoch 10/10
