In [None]:
!pip3 install sdv --user

LOADING DATA

In [None]:
from sdv.datasets.local import load_csvs

datasets = load_csvs("data/")
pokemon_types = datasets["pokemon_types"]
pokemon = datasets["pokemon"]
types = datasets["types"]

pokemon_types['composite_key'] = pokemon_types['pokemon_id'].astype(str) + '-' + pokemon_types['type_id'].astype(str)

In [None]:
datasets.keys()

CREATING METADATA

In [None]:
from sdv.metadata import MultiTableMetadata

#metadata is loaded and detected
metadata = MultiTableMetadata()

metadata.detect_from_dataframes(
    datasets
)

#print(metadata)

MODIFY METADATA

In [None]:
#Access metadata dictionary (metadata is represented as a dict or SDV object, here we do dict)
metadata_dict = metadata.to_dict()

# we need to make sure keys that are related as primary-foreign are the same data type in order to establish the relationship
# Modify the data type of 'id' in the 'pokemon' table to 'id'

metadata.update_column(
    table_name='pokemon_types',
    column_name='type_id',
    sdtype='id')

metadata.update_column(
    table_name='pokemon_types',
    column_name='pokemon_id',
    sdtype='id')

#set primary key in pokemon_types table
metadata.set_primary_key(
    table_name='pokemon_types',
    column_name='composite_key'
)

#  Update columns with Unknown types
metadata.update_column(table_name="pokemon", column_name="pokemon", sdtype="categorical")
metadata.update_column(table_name="pokemon", column_name="pokemon_order", sdtype="id")
metadata.update_column(table_name="types", column_name="type_name", sdtype="categorical")

#print(metadata_dict.get('type_id'))

EDIT RELATIONSHIPS

In [None]:
metadata.add_relationship(
    parent_table_name='pokemon',
    child_table_name='pokemon_types',
    parent_primary_key='id',
    child_foreign_key='pokemon_id'
)

metadata.add_relationship(
    parent_table_name='types',
    child_table_name='pokemon_types',
    parent_primary_key='id',
    child_foreign_key='type_id'
)

VISUALISE METADATA

In [None]:
metadata.visualize()

In [None]:
metadata.save_to_json(filepath='metadata4.json')

# # in the future, you can reload the metadata object from the file
# metadata = SingleTableMetadata.load_from_json(f'{FOLDER_NAME}Financial_metadata.json')

In [None]:
#load your saved metadata json file

from sdv.metadata import MultiTableMetadata

metadata = MultiTableMetadata.load_from_json(
    filepath='metadata2.json')


In [None]:
from sdv.multi_table import HMASynthesizer

# Step 1: Create the synthesizer
synthesizer = HMASynthesizer(
    metadata,
    verbose=True)

# Step 2: Train the synthesizer
synthesizer.fit(datasets)

# Step 3: Generate synthetic data
synthetic_data = synthesizer.sample(1)

In [None]:
# Save a model
synthesizer.save('synthesizer.pkl')

# Save synthetic dataset
synthetic_data['pokemon'].to_csv('synth/pokemon.csv', index=False)
synthetic_data['pokemon_types'].to_csv('synth/pokemon_types.csv', index=False)
synthetic_data['types'].to_csv('synth/types.csv', index=False)