## Prep and Input API Key

In [None]:
%%capture
!pip install -U gretel-client
!pip install fileupload
! pip jupyter labextension install @jupyter-widgets/jupyterlab-manager


In [None]:
# Specify your Gretel API key. Sign up for free here https://gretel.ai/

import pandas as pd
from gretel_client import configure_session

pd.set_option("max_colwidth", None)
configure_session(api_key="prompt", cache="yes", validate=True)


In [None]:
# Create a project

from gretel_client.projects import create_or_get_unique_project
project = create_or_get_unique_project(name="synthetic-data")


## Create the synthetic data configuration

In [None]:
import json

from gretel_client.projects.models import read_model_config

config = read_model_config("synthetics/default")

# Set the model parameters, 50 epochs is recommended.
config["models"][0]["synthetics"]["params"]["epochs"] = 50

#Uncomment line below to view model configuration details.
#print(json.dumps(config, indent=2))


## Load and preview the source dataset


In [None]:
#Upload your csv datafile
from ipywidgets import FileUpload
upload = FileUpload()
upload

In [None]:
with open("input.csv", "w+b") as i:
    i.write(upload.data[0])

In [None]:
# Load and preview
import pandas as pd

dataset_path = "input.csv"
df = pd.read_csv(dataset_path)
df.to_csv("training_data.csv", index=False)

#uncomment last line here to preview the DataFrame that will be used to train the synthetic model.
#df

## Train the synthetic model

In this step, we will task the worker running in the Gretel cloud, or locally, to train a synthetic model on the source dataset.


In [None]:
from gretel_client.helpers import poll

model = project.create_model_obj(model_config=config, data_source="training_data.csv")
model.submit_cloud()

poll(model)


In [None]:
# View the synthetic data

synthetic_df = pd.read_csv(model.get_artifact_link("data_preview"), compression="gzip")

synthetic_df


# View the synthetic data quality report


In [None]:
# Generate report that shows the statistical performance between the training and synthetic data

import IPython
from smart_open import open

IPython.display.HTML(data=open(model.get_artifact_link("report")).read())


# Generate unlimited synthetic data

You can now use the trained synthetic model to generate as much synthetic data as you like.


In [None]:
# Generate more records from the model

record_handler = model.create_record_handler_obj(
    params={"num_records": 100, "max_invalid": 500}
)
record_handler.submit_cloud()
poll(record_handler)


In [None]:
# Write synthetic data to local file

synthetic_df = pd.read_csv(record_handler.get_artifact_link("data"), compression="gzip")

synthetic_df

synthetic_df.to_csv('synth_data.csv')

