# Importing required libraries and setting path

In [15]:
import sdv # Synthetic data library
from sdv.metadata import SingleTableMetadata
#from sdv.datasets.local import load_csvs
from sdv.single_table import GaussianCopulaSynthesizer
import pandas as pd # For handling data as dataframes
import sys # Used for setting a path to find the dataset

sys.path.append('..') # Setting parent directory as current directory


# Data Preparation

In [14]:
#Load Dataset
adult = pd.read_csv("../datasets/adult/adult.data")  # load dataset, adjust if data is stored somewhere else

#Preparing Meta Data
adult_metadata = SingleTableMetadata() # Create blank meta data object
adult_metadata.detect_from_dataframe(adult) # Detect meta data

# Print Meta Data information
python_dict = adult_metadata.to_dict() # Convert to python dictionary
print("adult_metadata:\n", python_dict) # Print meta data


adult_metadata:
 {'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1', 'columns': {'39': {'sdtype': 'numerical'}, ' State-gov': {'sdtype': 'administrative_unit', 'pii': True}, ' 77516': {'sdtype': 'numerical'}, ' Bachelors': {'sdtype': 'categorical'}, ' 13': {'sdtype': 'numerical'}, ' Never-married': {'sdtype': 'categorical'}, ' Adm-clerical': {'sdtype': 'categorical'}, ' Not-in-family': {'sdtype': 'categorical'}, ' White': {'sdtype': 'categorical'}, ' Male': {'sdtype': 'categorical'}, ' 2174': {'sdtype': 'numerical'}, ' 0': {'sdtype': 'numerical'}, ' 40': {'sdtype': 'numerical'}, ' United-States': {'sdtype': 'categorical'}, ' <=50K': {'sdtype': 'categorical'}}}


# Modeling

In [21]:
# Step 1: Create synthesitzer object
synthesizer = GaussianCopulaSynthesizer(adult_metadata)

# Step 2: Train the synthesizer
synthesizer.fit(adult)

# Step 3: Generate synthetic data
synthetic_data = synthesizer.sample(1000)

# Print synthetic data
print(adult)
print(synthetic_data)
synthetic_data.to_csv('../../../datasets/adult/synthetic_adult.data', index=False)


       39          State-gov   77516    Bachelors   13        Never-married  \
0      50   Self-emp-not-inc   83311    Bachelors   13   Married-civ-spouse   
1      38            Private  215646      HS-grad    9             Divorced   
2      53            Private  234721         11th    7   Married-civ-spouse   
3      28            Private  338409    Bachelors   13   Married-civ-spouse   
4      37            Private  284582      Masters   14   Married-civ-spouse   
...    ..                ...     ...          ...  ...                  ...   
32555  27            Private  257302   Assoc-acdm   12   Married-civ-spouse   
32556  40            Private  154374      HS-grad    9   Married-civ-spouse   
32557  58            Private  151910      HS-grad    9              Widowed   
32558  22            Private  201490      HS-grad    9        Never-married   
32559  52       Self-emp-inc  287927      HS-grad    9   Married-civ-spouse   

             Adm-clerical   Not-in-family   White  