# Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import torch
import random
import pyreadr
from synthcity.plugins import Plugins
import os

# synthcity absolute
from synthcity.plugins.core.dataloader import DataLoader, GenericDataLoader
from synthcity.plugins.core.distribution import Distribution
from synthcity.plugins.core.plugin import Plugin
from synthcity.plugins.core.schema import Schema

    The default C++ compiler could not be found on your system.
    You need to either define the CXX environment variable or a symlink to the g++ command.
    For example if g++-8 is the command you can do
      import os
      os.environ['CXX'] = 'g++-8'
    


# Import data sets

In [2]:
# load datasets
data_small = (pyreadr.read_r("D:/Master Statistik/Masterarbeit/masterarbeit/IST-3 Data/Raw Data/data_small.Rds"))[None]
real_train = pd.read_csv('D:/Master Statistik/Masterarbeit/masterarbeit/IST-3 Data/Raw Data/real_train.csv')

In [3]:
# convert category columns to object for all datasets
data_small[data_small.select_dtypes(include = "category").columns] = data_small.select_dtypes(include = "category").astype(object)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Synthesize data

In [4]:
data_small.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3035 entries, 0 to 3034
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   outcome      3035 non-null   object 
 1   itt_treat    3035 non-null   object 
 2   nihss        3035 non-null   float64
 3   randdelay    3035 non-null   float64
 4   vis_infarct  3035 non-null   object 
 5   age          3035 non-null   float64
dtypes: float64(3), object(3)
memory usage: 142.4+ KB


In [5]:
# Initialize the PrivBayes model from synthcity
privbayes_model = Plugins().get("privbayes")

[2025-03-17T10:38:10.492479+0100][6184][CRITICAL] module disabled: C:\Users\Julia Hoepler\anaconda3\Lib\site-packages\synthcity\plugins\generic\plugin_goggle.py


## m = 50

In [None]:
# Directory to save the datasets
save_directory = "D:/Master Statistik/Masterarbeit/masterarbeit/IST-3 Data/Data/privbayes/"

# Ensure the save directory exists
os.makedirs(save_directory, exist_ok=True)

# set seeds
def setSeed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

for i in range(50):  # Loop to create 5 datasets
    # Train the ARFPy model on the shuffled dataset
    privbayes_model.fit(data_small)

    # Generate synthetic data, set seed if the method allows for it
    setSeed(i)
    synthetic_data_small = privbayes_model.generate(count=3035).dataframe()  # Use the seed if possible

    # Create a dynamic filename with the dataset number
    filename = f"syn_data_small_privbayes_{i + 1}.csv"  # i + 1 for proper numbering

    # Save the synthetic dataset
    synthetic_data_small.to_csv(os.path.join(save_directory, filename), index=False)

    print(f"Saved: {filename}")

## m = 50 for train_data

In [7]:
# Directory to save the datasets
save_directory = "D:/Master Statistik/Masterarbeit/masterarbeit/IST-3 Data/Data/train_data/privbayes/"

# Ensure the save directory exists
os.makedirs(save_directory, exist_ok=True)

# set seeds
def setSeed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

for i in range(50):  # Loop to create 5 datasets
    # Train the CTGAN model on the shuffled dataset
    privbayes_model.fit(real_train)

    # Generate synthetic data, set seed if the method allows for it
    setSeed(i)
    synthetic_data_small = privbayes_model.generate(count=2428).dataframe()  # Use the seed if possible

    # Create a dynamic filename with the dataset number
    filename = f"syn_real_train_privbayes_{i + 1}.csv"  # i + 1 for proper numbering

    # Save the synthetic dataset
    synthetic_data_small.to_csv(os.path.join(save_directory, filename), index=False)

    print(f"Saved: {filename}")

100%|██████████| 5/5 [00:00<00:00,  5.91it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_1.csv


100%|██████████| 5/5 [00:00<00:00,  5.41it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_2.csv


100%|██████████| 5/5 [00:00<00:00,  6.01it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_3.csv


100%|██████████| 5/5 [00:00<00:00,  5.22it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_4.csv


100%|██████████| 5/5 [00:01<00:00,  3.92it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_5.csv


100%|██████████| 5/5 [00:00<00:00,  5.52it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_6.csv


100%|██████████| 5/5 [00:00<00:00,  6.29it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_7.csv


100%|██████████| 5/5 [00:03<00:00,  1.63it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_8.csv


100%|██████████| 5/5 [00:00<00:00,  6.01it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_9.csv


100%|██████████| 5/5 [00:00<00:00,  6.14it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_10.csv


100%|██████████| 5/5 [00:00<00:00,  6.31it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_11.csv


100%|██████████| 5/5 [00:02<00:00,  1.78it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_12.csv


100%|██████████| 5/5 [00:01<00:00,  4.83it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_13.csv


100%|██████████| 5/5 [00:01<00:00,  4.34it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_14.csv


100%|██████████| 5/5 [00:01<00:00,  4.66it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_15.csv


100%|██████████| 5/5 [00:01<00:00,  4.58it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_16.csv


100%|██████████| 5/5 [00:01<00:00,  3.94it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_17.csv


100%|██████████| 5/5 [00:01<00:00,  3.35it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_18.csv


100%|██████████| 5/5 [00:01<00:00,  3.46it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_19.csv


100%|██████████| 5/5 [00:01<00:00,  4.26it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_20.csv


100%|██████████| 5/5 [00:01<00:00,  4.83it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_21.csv


100%|██████████| 5/5 [00:01<00:00,  4.51it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_22.csv


100%|██████████| 5/5 [00:01<00:00,  4.51it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_23.csv


100%|██████████| 5/5 [00:01<00:00,  4.42it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_24.csv


100%|██████████| 5/5 [00:01<00:00,  4.34it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_25.csv


100%|██████████| 5/5 [00:01<00:00,  4.91it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_26.csv


100%|██████████| 5/5 [00:01<00:00,  4.87it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_27.csv


100%|██████████| 5/5 [00:01<00:00,  3.87it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_28.csv


100%|██████████| 5/5 [00:00<00:00,  5.29it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_29.csv


100%|██████████| 5/5 [00:00<00:00,  5.32it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_30.csv


100%|██████████| 5/5 [00:01<00:00,  4.75it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_31.csv


100%|██████████| 5/5 [00:01<00:00,  4.46it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_32.csv


100%|██████████| 5/5 [00:00<00:00,  5.32it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_33.csv


100%|██████████| 5/5 [00:00<00:00,  5.35it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_34.csv


100%|██████████| 5/5 [00:00<00:00,  5.33it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_35.csv


100%|██████████| 5/5 [00:00<00:00,  5.42it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_36.csv


100%|██████████| 5/5 [00:00<00:00,  5.16it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_37.csv


100%|██████████| 5/5 [00:00<00:00,  5.32it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_38.csv


100%|██████████| 5/5 [00:01<00:00,  4.85it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_39.csv


100%|██████████| 5/5 [00:00<00:00,  5.09it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_40.csv


100%|██████████| 5/5 [00:00<00:00,  5.52it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_41.csv


100%|██████████| 5/5 [00:00<00:00,  6.16it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_42.csv


100%|██████████| 5/5 [00:00<00:00,  5.73it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_43.csv


100%|██████████| 5/5 [00:01<00:00,  4.66it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_44.csv


100%|██████████| 5/5 [00:00<00:00,  6.00it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_45.csv


100%|██████████| 5/5 [00:00<00:00,  5.63it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_46.csv


100%|██████████| 5/5 [00:00<00:00,  5.18it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_47.csv


100%|██████████| 5/5 [00:01<00:00,  4.79it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_48.csv


100%|██████████| 5/5 [00:00<00:00,  5.84it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_49.csv


100%|██████████| 5/5 [00:00<00:00,  6.34it/s]


  0%|          | 0/6 [00:00<?, ?it/s]



  0%|          | 0/6 [00:00<?, ?it/s]



Saved: syn_real_train_privbayes_50.csv
