In [455]:
from sdv.datasets.local import load_csvs


datasets = load_csvs(folder_name='ds3/')

In [456]:
datasets

{'Address':           lexid   street_name          city state    zip
 0    8888090530      Prentice      Columbia    MO  65218
 1    8888090531      Talmadge     Rochester    NY  14652
 2    8888090532           3rd       Raleigh    NC  27690
 3    8888090533  Glacier Hill    Greensboro    NC  27425
 4    8888090534        Summit     Waterbury    CT   6705
 ..          ...           ...           ...   ...    ...
 495  8888091025      Beilfuss  Jacksonville    FL  32259
 496  8888091026        Buhler   Chattanooga    TN  37450
 497  8888091027     Heffernan    Lake Worth    FL  33462
 498  8888091028        Fordem       El Paso    TX  79955
 499  8888091029        Luster    Washington    DC  20041
 
 [500 rows x 5 columns],
 'Auto':           lexid                vin state
 0    8888090530  3GYVKMEF3AG838180    KS
 1    8888090531  2C3CCAJG0DH720555    IN
 2    8888090532  2T1KE4EE0BC703497    KS
 3    8888090533  KMHGC4DE5BU857408    TX
 4    8888090534  JN8AZ2NC4C9893335    OH
 ..   

In [422]:
from sdv.metadata import MultiTableMetadata

metadata = MultiTableMetadata()

In [423]:
metadata.detect_from_csvs(
    folder_name='ds3/'
)
metadata.lo

In [424]:
metadata

{
    "tables": {
        "Address": {
            "columns": {
                "lexid": {
                    "sdtype": "numerical"
                },
                "street_name": {
                    "sdtype": "categorical"
                },
                "city": {
                    "sdtype": "categorical"
                },
                "state": {
                    "sdtype": "categorical"
                },
                "zip": {
                    "sdtype": "numerical"
                }
            }
        },
        "Auto": {
            "columns": {
                "lexid": {
                    "sdtype": "numerical"
                },
                "vin": {
                    "sdtype": "categorical"
                },
                "state": {
                    "sdtype": "categorical"
                }
            }
        },
        "Names": {
            "columns": {
                "lexid": {
                    "sdtype": "numerical"
                }

In [425]:
metadata.update_column(
    table_name = "Address",
    column_name = "lexid",
    sdtype = "id",
    regex_format = '^[0-9]{12}$'
)

In [427]:
metadata.update_column(
    table_name = "Address",
    column_name = "street_name",
    sdtype = "street_address",
    pii = True
)

In [428]:
metadata.update_column(
    table_name = "Address",
    column_name = "city",
    sdtype = "city",
    pii = True
)

In [429]:
metadata.update_column(
    table_name = "Address",
    column_name = "state",
    sdtype = "country_code",
    pii = True
)

In [430]:
metadata.update_column(
    table_name = "Address",
    column_name = "zip",
    sdtype = "postcode",
    pii = True
)

In [431]:
metadata.update_column(
    table_name = "Auto",
    column_name = "lexid",
    sdtype = "id",
    regex_format = '^[0-9]{12}$'
)

In [432]:
metadata.update_column(
    table_name = "Names",
    column_name = "lexid",
    sdtype = "id",
    regex_format = '^[0-9]{12}$'
)

In [433]:
metadata.update_column(
    table_name = "Names",
    column_name = "first_name",
    sdtype = "first_name",
    pii = True
)

In [434]:
metadata.update_column(
    table_name = "Names",
    column_name = "last_name",
    sdtype = "last_name",
    pii = True
)

In [435]:
metadata.update_column(
    table_name = "Names",
    column_name = "ssn",
    sdtype = "ssn",
    pii = True
)

In [437]:
metadata.update_column(
    table_name = "Names",
    column_name = "dob",
    sdtype = "datetime",
    datetime_format = "%Y-%m-%d"
)

In [438]:
metadata.set_primary_key(
    table_name = "Names",
    column_name = "lexid"
)

In [439]:
metadata.add_relationship(
    parent_table_name="Names",
    child_table_name="Address",
    parent_primary_key = "lexid",
    child_foreign_key="lexid"
)

In [440]:
metadata.add_relationship(
    parent_table_name="Names",
    child_table_name="Auto",
    parent_primary_key = "lexid",
    child_foreign_key="lexid"
)

In [464]:
metadata.to_dict()

{'tables': {'Address': {'columns': {'lexid': {'sdtype': 'id',
     'regex_format': '^[0-9]{12}$'},
    'street_name': {'sdtype': 'street_address', 'pii': True},
    'city': {'sdtype': 'city', 'pii': True},
    'state': {'sdtype': 'country_code', 'pii': True},
    'zip': {'sdtype': 'postcode', 'pii': True}}},
  'Auto': {'columns': {'lexid': {'sdtype': 'id',
     'regex_format': '^[0-9]{12}$'},
    'vin': {'sdtype': 'categorical'},
    'state': {'sdtype': 'categorical'}}},
  'Names': {'primary_key': 'lexid',
   'columns': {'lexid': {'sdtype': 'id', 'regex_format': '^[0-9]{12}$'},
    'first_name': {'sdtype': 'first_name', 'pii': True},
    'last_name': {'sdtype': 'last_name', 'pii': True},
    'ssn': {'sdtype': 'ssn', 'pii': True},
    'dob': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
    'age': {'sdtype': 'numerical'},
    'gender': {'sdtype': 'categorical'}}}},
 'relationships': [{'parent_table_name': 'Names',
   'child_table_name': 'Address',
   'parent_primary_key': 'lexi

In [452]:
from sdv.multi_table import HMASynthesizer

synthesizer = HMASynthesizer(metadata)


In [453]:
synthesizer.load_custom_constraint_classes(
    filepath='Custom_Constraints_age.py',
    class_names=['MatchDOBandAge']
)

# create constraints using the class

# if has_rewards=True, the amenities_fee=0
DOBandAge = {
    'constraint_class': 'MatchDOBandAge',
    'table_name' : 'Names',
    'constraint_parameters': {
        'column_names': ['dob', 'age'],
    }
}

# apply the constraints to the synthesizer
synthesizer.add_constraints([
    DOBandAge
])

In [457]:
synthesizer.fit(datasets)
synthesized_data = synthesizer.sample(scale=2)


Preprocess Tables: 100%|██████████| 3/3 [00:01<00:00,  2.09it/s]



Learning relationships:


(1/2) Tables 'Names' and 'Auto' ('lexid'): 100%|██████████| 500/500 [00:03<00:00, 145.98it/s]
(2/2) Tables 'Names' and 'Address' ('lexid'): 100%|██████████| 500/500 [00:00<00:00, 1478.41it/s]





Modeling Tables: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]


In [458]:
synthesized_data

{'Names':      lexid first_name   last_name          ssn         dob  age  gender
 0        0      James       Jones  153-80-6198  1996-09-26   27    Male
 1        1   Michelle       Curry  739-79-0005  1960-05-05   63    Male
 2        2    Barbara       Ortiz  243-81-8687  1980-01-28   43  Female
 3        3      James      Lucero  646-97-0698  1964-06-04   59  Female
 4        4    Cynthia      Prince  742-47-3309  1958-06-10   65  Female
 ..     ...        ...         ...          ...         ...  ...     ...
 995    995     Lauren    Williams  636-21-0117  1957-02-15   66    Male
 996    996     Andrea       Frost  636-12-3017  1986-09-26   37  Female
 997    997     Travis  Williamson  518-02-2492  1946-11-02   77    Male
 998    998     Joseph       Mason  838-24-0049  1952-05-28   71    Male
 999    999    Annette        Hill  243-32-4111  1968-05-14   55  Female
 
 [1000 rows x 7 columns],
 'Auto':      lexid                vin state
 0        0  WDDHF5KB4EA378830    FL
 1   

In [461]:
synthesized_data['Names'].to_csv('Output/Names.csv')

In [462]:
synthesized_data['Address'].to_csv('Output/Address.csv')
synthesized_data['Auto'].to_csv('Output/Auto.csv')

In [None]:
my_constraint = {
    'constraint_class': 'ScalarRange',
    'table_name': 'Names', # for multi table synthesizers
    'constraint_parameters': {
        'column_name': 'dob',
        'low_value': '1940-01-01',
        'high_value': '2010-01-01',
        'strict_boundaries': True
    }
}

synthesizer.add_constraints(constraints=[
    my_constraint
])