# Prepare synthetic data to define census of wards in the tower flow report
It is going to be much easier to develop if you have realistic synthetic data.
Here we take a sql query that generates a single tabular output.
We run that query against the live identifiable data once.
We then use the [Synthetic Data Vault](https://sdv.dev/SDV/index.html) to prepare a synthetic model of those data.
The code below serves as a vignette for that process but will need adjusting to match the exact contents of the original query.

More complex examples that include multiple tables with joins and dependencies are also possible.

This notebook should be run interactively just once

In [None]:
import os
from pathlib import Path

import pandas as pd
from sqlalchemy import create_engine

In [None]:
# Construct the PostgreSQL connection
uds_host = os.getenv("EMAP_DB_HOST")
uds_user = os.getenv("EMAP_DB_USER")
uds_passwd = os.getenv("EMAP_DB_PASSWORD")
dsn = f"postgresql://{uds_user}:{uds_passwd}@{uds_host}:5432/uds"
emapdb_engine = create_engine(dsn)

In [None]:
from wards import wards

In [None]:
wards[49:]

In [None]:
# Read the sql file into a query 'q' and the query into a dataframe
q = Path("beds.sql").read_text()
_wards = wards[49:]  # for testing just work with WMS
_wards = wards[:]
_locations = ["T06C^T06C BY08^BY08-36"]

In [None]:
_wards = list(_wards) if type(_wards) is str else _wards
df = pd.read_sql_query(
    sql=q, con=emapdb_engine, params={"wards": _wards, "locations": []}
)
df.head()

## Fake Personsally Identifiable Information

In [None]:
from faker import Faker

fake = Faker()

In [None]:
df["encounter"] = df["encounter"].map(
    lambda x: int(fake.numerify("10########")), na_action="ignore"
)
df["mrn"] = df["mrn"].map(lambda x: int(fake.numerify("40######")), na_action="ignore")
df["lastname"] = df["lastname"].map(
    lambda x: fake.last_name().upper(), na_action="ignore"
)
df["firstname"] = df["firstname"].map(
    lambda x: fake.first_name().upper(), na_action="ignore"
)
df["date_of_birth"] = df["date_of_birth"].map(
    lambda x: fake.date_of_birth(), na_action="ignore"
)
df["cvl_hv_id"] = df["cvl_hv_id"].map(
    lambda x: fake.random_number(digits=6, fix_len=True), na_action="ignore"
)
df["ovl_hv_id"] = df["ovl_hv_id"].map(
    lambda x: fake.random_number(digits=6, fix_len=True), na_action="ignore"
)

In [None]:
# inspect an example
df.iloc[0]

### Save the synthetic data via sqlite


In [None]:
engine_sqlite = create_engine("sqlite:///beds.db")
con = engine_sqlite.connect()
df.to_sql("beds", con=con, if_exists="replace", index=False)

In [None]:
pd.read_sql("beds", con=con)