In [60]:
import sdv
print(sdv.__version__)
import pandas as pd
import commonfunc as common
import numpy as np

1.2.1


In [61]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [62]:
replacement_value = 'Unknown'


In [63]:
from sdv.datasets.local import load_csvs
from sdv.datasets.demo import download_demo

# This is the default folder name that the GOogle Colab notebook uses.
# Change this if you have your own folder with CSV files.
FOLDER_NAME = 'sampled_data_csv_100/'

try:
  datasets = load_csvs(folder_name='C:\\Users\\shrus\\Documents\\Synthetic-data-generation\\sampled_data_csv_100\\')
except ValueError:
  print('You have not uploaded any csv files. Using some demo data instead.')
  datasets, _ = download_demo(
    modality='multi_table',
    dataset_name='fake_hotels'
  )

In [5]:
admissions_df=datasets['admissions']
callout_df=datasets['callout']
chartevents_df=datasets['chartevents']
cptevents_df = datasets['cptevents']
icustays_df = datasets['icustays']
inputevents_mv_df = datasets['inputevents_mv']
labevents_df = datasets['labevents']
microbiologyevents_df = datasets['microbiologyevents']
outputevents_df = datasets['outputevents']
patients_df = datasets['patients']
prescriptions_df = datasets['prescriptions']
procedureevents_mv_df = datasets['procedureevents_mv']
procedures_icd_df = datasets['procedures_icd']

## Data Preprocessing

### Dropping row_id column

In [6]:
def drop_row_id_column(data):
    if 'row_id' in data.columns:
        data.drop('row_id', axis=1, inplace=True)

In [7]:
drop_row_id_column(admissions_df)
drop_row_id_column(callout_df)
drop_row_id_column(chartevents_df)
drop_row_id_column(cptevents_df)
drop_row_id_column(icustays_df)
drop_row_id_column(inputevents_mv_df)
drop_row_id_column(labevents_df)
drop_row_id_column(microbiologyevents_df)
drop_row_id_column(outputevents_df)
drop_row_id_column(patients_df)
drop_row_id_column(prescriptions_df)
drop_row_id_column(procedureevents_mv_df)
drop_row_id_column(procedures_icd_df)

## Handling Date and time objects

In [8]:
def changeObjectToDateTime(df,list):
    for i in list:
        df[i]=pd.to_datetime(df[i])

In [9]:

changeObjectToDateTime(admissions_df,['admittime','dischtime','deathtime','edregtime','edouttime'])
changeObjectToDateTime(callout_df,['createtime','updatetime','acknowledgetime','outcometime','firstreservationtime','currentreservationtime'])
changeObjectToDateTime(chartevents_df,['charttime','storetime'])
changeObjectToDateTime(icustays_df,['intime','outtime'])
changeObjectToDateTime(inputevents_mv_df,['starttime','endtime','storetime','comments_date'])
changeObjectToDateTime(labevents_df,['charttime'])
changeObjectToDateTime(microbiologyevents_df,['chartdate','charttime'])
changeObjectToDateTime(outputevents_df,['charttime','storetime'])
changeObjectToDateTime(patients_df,['dob','dod','dod_hosp','dod_ssn'])
changeObjectToDateTime(prescriptions_df,['startdate','enddate'])
changeObjectToDateTime(procedureevents_mv_df,['starttime','endtime','storetime','comments_date'])

## Preprocessing individual tables

### Patients table:

In [10]:
patients_df.head()

Unnamed: 0,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag
0,87197,F,2054-10-12,NaT,NaT,NaT,0
1,1765,M,2183-05-09,NaT,NaT,NaT,0
2,21269,M,2078-07-25,2162-12-10,2162-12-10,2162-12-10,1
3,5692,M,2099-09-01,NaT,NaT,NaT,0
4,30096,F,2080-05-03,2161-10-08,NaT,2161-10-08,1


In [11]:
admission_date = admissions_df.loc[:, ['subject_id', 'admittime']]
patients_mid = pd.merge(left=patients_df, right=admission_date, how='inner', on=['subject_id'])

In [12]:
patients_mid

Unnamed: 0,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag,admittime
0,87197,F,2054-10-12,NaT,NaT,NaT,0,2109-11-16 08:11:00
1,1765,M,2183-05-09,NaT,NaT,NaT,0,2183-05-09 08:35:00
2,21269,M,2078-07-25,2162-12-10,2162-12-10,2162-12-10,1,2162-10-03 17:02:00
3,5692,M,2099-09-01,NaT,NaT,NaT,0,2143-10-23 15:42:00
4,5692,M,2099-09-01,NaT,NaT,NaT,0,2147-12-15 23:57:00
...,...,...,...,...,...,...,...,...
127,58667,M,2067-06-29,2130-08-20,2130-08-20,2130-08-20,1,2130-08-19 20:03:00
128,3391,F,2143-03-01,NaT,NaT,NaT,0,2143-03-01 19:47:00
129,31650,M,2090-11-19,NaT,NaT,NaT,0,2177-06-07 17:54:00
130,15895,M,2039-10-21,2124-07-11,NaT,2124-07-11,1,2116-07-04 17:13:00


In [13]:
admission_date.drop_duplicates(subset=['subject_id'], keep='first', inplace=True)


In [14]:
admission_date

Unnamed: 0,subject_id,admittime
0,87197,2109-11-16 08:11:00
1,1765,2183-05-09 08:35:00
2,21269,2162-10-03 17:02:00
3,5692,2143-10-23 15:42:00
6,30096,2161-08-27 15:11:00
...,...,...
127,58667,2130-08-19 20:03:00
128,3391,2143-03-01 19:47:00
129,31650,2177-06-07 17:54:00
130,15895,2116-07-04 17:13:00


In [15]:
# Drop useless colums
patients_df.drop(['expire_flag', 'dod'], axis=1, inplace=True)

In [16]:
patients_df.isnull().sum()

subject_id     0
gender         0
dob            0
dod_hosp      80
dod_ssn       75
dtype: int64

In [17]:
patients_df.dtypes

subject_id             int64
gender                object
dob           datetime64[ns]
dod_hosp      datetime64[ns]
dod_ssn       datetime64[ns]
dtype: object

In [18]:
patients_df = pd.merge(left=patients_df, right=admission_date, how='inner', on=['subject_id'])

In [19]:
import datetime
import random

def adjust_age_over_90(df):
    years_100 = datetime.timedelta(days = (365 * 100 + 100/4))
    random_days_10_years = datetime.timedelta(days = random.randint(0, 10)*365)
    
    if (df['dob'] - datetime.datetime(1970,1,1)).total_seconds() < 0:
        return (df['admittime'] - years_100 + random_days_10_years)
    else:
        return df['dob']

In [20]:
patients_df['dob'] = patients_df.apply(adjust_age_over_90, axis=1)

In [21]:
# Calculate the age
def get_birth_delta(df):
    return df['admittime'] - df['dob']

# Randomly pick a time between 2001-2012
def adjust_admittime(df):
    return pd.Timestamp(year=random.randint(2001, 2012), \
        month=df['admittime'].month, day=df['admittime'].day)

# Renew the date of birth
def reset_dob(df):
    dob = df['admittime'] - df['dob']
    return pd.to_datetime(dob.date())

In [22]:
patients_df['dob'] = patients_df.apply(get_birth_delta, axis=1)
patients_df['admittime'] = patients_df.apply(adjust_admittime, axis=1)
patients_df['dob'] = patients_df.apply(reset_dob, axis=1)

In [23]:
patients_df

Unnamed: 0,subject_id,gender,dob,dod_hosp,dod_ssn,admittime
0,87197,F,1949-10-12,NaT,NaT,2004-11-16
1,1765,M,2003-05-08,NaT,NaT,2003-05-09
2,21269,M,1917-07-25,2162-12-10,2162-12-10,2001-10-03
3,5692,M,1957-09-01,NaT,NaT,2001-10-23
4,30096,F,1920-05-03,NaT,2161-10-08,2001-08-27
...,...,...,...,...,...,...
95,58667,M,1943-06-29,2130-08-20,2130-08-20,2006-08-19
96,3391,F,2006-02-28,NaT,NaT,2006-03-01
97,31650,M,1915-11-19,NaT,NaT,2002-06-07
98,15895,M,1929-10-20,NaT,2124-07-11,2006-07-04


In [24]:
patients_df.drop(['admittime'], axis=1, inplace=True)


### Admissions Table

In [25]:
admissions_df['deathtime'].unique()

array([                          'NaT', '2128-04-16T12:00:00.000000000',
       '2148-11-03T22:37:00.000000000', '2166-03-28T06:00:00.000000000',
       '2168-01-06T16:33:00.000000000', '2106-04-18T09:45:00.000000000',
       '2185-12-11T08:00:00.000000000', '2140-02-04T20:55:00.000000000',
       '2106-03-23T20:16:00.000000000', '2163-01-21T15:45:00.000000000',
       '2188-10-12T15:15:00.000000000', '2130-08-20T01:56:00.000000000'],
      dtype='datetime64[ns]')

In [26]:
admissions_df.drop(['diagnosis', 'hospital_expire_flag'], axis=1, inplace=True)

In [27]:
dob_df = patients_df.loc[:, ['subject_id', 'dob']]

In [28]:
admissions_df = pd.merge(dob_df, admissions_df, how='inner', on='subject_id')

In [29]:
admissions_df.isnull().sum()

subject_id                0
dob                       0
hadm_id                   0
admittime                 0
dischtime                 0
deathtime               121
admission_type            0
admission_location        0
discharge_location        0
insurance                 0
language                 45
religion                  1
marital_status           19
ethnicity                 0
edregtime                62
edouttime                62
has_chartevents_data      0
dtype: int64

In [30]:
admissions_df.dtypes

subject_id                       int64
dob                     datetime64[ns]
hadm_id                          int64
admittime               datetime64[ns]
dischtime               datetime64[ns]
deathtime               datetime64[ns]
admission_type                  object
admission_location              object
discharge_location              object
insurance                       object
language                        object
religion                        object
marital_status                  object
ethnicity                       object
edregtime               datetime64[ns]
edouttime               datetime64[ns]
has_chartevents_data             int64
dtype: object

In [31]:
(admissions_df['dob'] < admissions_df['admittime']).value_counts()

True    132
dtype: int64

In [32]:
admissions_df = admissions_df.drop(admissions_df.loc[(pd.isna(admissions_df['deathtime']) == False) & (admissions_df['dischtime'] != admissions_df['deathtime'])].index)

In [33]:
import datetime
import random

def adjust_age_over_90(df):
    '''
    This method is to adjust the invalid date in 'dob' (which is 18xx)
    Process: Use the admittime minus 90 - 100 years, to let the age of patients are between 90 to 100
    '''

    years_100 = datetime.timedelta(days = (365 * 100 + 100/4))
    random_days_10_years = datetime.timedelta(days = random.randint(0, 10)*365)
    
    if (df['dob'] - datetime.datetime(1970,1,1)).total_seconds() < 0:
        return (df['admittime'] - years_100 + random_days_10_years)
    else:
        return df['dob']

In [34]:
admissions_df['dob'] = admissions_df.apply(adjust_age_over_90, axis=1)

In [35]:
# admissions_df['deathtime'] = admissions_df['deathtime'].apply(lambda flag: 1 if pd.isna(flag) == False else np.NaN)

In [36]:
# admissions_df['edouttime'] = admissions_df.apply(common.time_process, args=('edregtime', 'edouttime'), axis=1)
# admissions_df['edregtime'] = admissions_df.apply(common.time_process, args=('admittime', 'edregtime'), axis=1)
# admissions_df['dischtime'] = admissions_df.apply(common.time_process, args=('admittime', 'dischtime'), axis=1)
# admissions_df['admittime'] = admissions_df.apply(common.time_process, args=('dob', 'admittime'), axis=1)

In [37]:
admissions_df.drop(['dob'], axis=1, inplace=True)

In [38]:
replacement_value = 'Unknown'

# Fill NaN values with the specified replacement_value in the column
admissions_df['language'] = admissions_df['language'].fillna(replacement_value)
admissions_df['religion'] = admissions_df['religion'].fillna(replacement_value)
admissions_df['marital_status'] = admissions_df['marital_status'].fillna(replacement_value)

In [39]:
admissions_df

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,has_chartevents_data
0,87197,176931,2109-11-16 08:11:00,2109-11-18 17:25:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,ENGL,NOT SPECIFIED,MARRIED,WHITE,2109-11-16 01:09:00,2109-11-16 09:26:00,1
1,1765,145878,2183-05-09 08:35:00,2183-05-16 11:50:00,NaT,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,Unknown,UNOBTAINABLE,Unknown,WHITE,NaT,NaT,1
2,21269,109697,2162-10-03 17:02:00,2162-12-01 19:30:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SHORT TERM HOSPITAL,Medicare,Unknown,JEWISH,MARRIED,WHITE,2162-10-03 09:24:00,2162-10-03 17:18:00,1
3,5692,151113,2143-10-23 15:42:00,2143-11-08 17:25:00,NaT,EMERGENCY,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Private,ENGL,NOT SPECIFIED,MARRIED,WHITE,NaT,NaT,1
4,5692,193147,2147-12-15 23:57:00,2147-12-27 12:00:00,NaT,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,ENGL,NOT SPECIFIED,MARRIED,WHITE,NaT,NaT,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,58667,197356,2130-08-19 20:03:00,2130-08-20 01:56:00,2130-08-20 01:56:00,EMERGENCY,CLINIC REFERRAL/PREMATURE,DEAD/EXPIRED,Government,ENGL,PROTESTANT QUAKER,DIVORCED,WHITE,2130-08-19 15:57:00,2130-08-19 21:16:00,1
128,3391,136471,2143-03-01 19:47:00,2143-03-03 15:11:00,NaT,NEWBORN,CLINIC REFERRAL/PREMATURE,HOME,Medicaid,Unknown,UNOBTAINABLE,Unknown,BLACK/AFRICAN AMERICAN,NaT,NaT,1
129,31650,137933,2177-06-07 17:54:00,2177-06-12 14:34:00,NaT,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,SNF,Medicare,Unknown,CATHOLIC,MARRIED,UNABLE TO OBTAIN,NaT,NaT,1
130,15895,135286,2116-07-04 17:13:00,2116-07-14 12:16:00,NaT,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,Unknown,PROTESTANT QUAKER,MARRIED,UNKNOWN/NOT SPECIFIED,NaT,NaT,1



### Icustays table:

In [40]:
icustays_df.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,87197,176931,209341,metavision,CCU,CCU,7,7,2109-11-16 08:12:18,2109-11-17 17:39:41,1.394
1,1765,145878,254146,carevue,NICU,NICU,56,56,2183-05-09 09:29:55,2183-05-16 19:18:28,7.4087
2,21269,109697,241691,carevue,MICU,MICU,12,12,2162-10-03 17:03:08,2162-12-01 20:11:22,59.1307
3,5692,151113,279055,carevue,CSRU,CSRU,12,15,2143-10-31 14:16:32,2143-11-01 19:20:00,1.2107
4,5692,193147,241135,metavision,CCU,CCU,7,7,2147-12-16 00:00:03,2147-12-17 16:45:54,1.6985


In [41]:
icustays_df.isnull().sum()

subject_id        0
hadm_id           0
icustay_id        0
dbsource          0
first_careunit    0
last_careunit     0
first_wardid      0
last_wardid       0
intime            0
outtime           0
los               0
dtype: int64

In [42]:
admission_mid = admissions_df.loc[:, ['hadm_id', 'admittime']]

In [43]:
icustays_mid_df = pd.merge(left=icustays_df, right=admission_mid , how='left', on=['hadm_id'])

In [44]:
# icustays_mid_df['outtime'] = icustays_mid_df.apply(common.time_process, args=('intime', 'outtime'), axis=1)
# icustays_mid_df['intime'] = icustays_mid_df.apply(common.time_process, args=('admittime', 'intime'), axis=1)

In [45]:
icustays_df = icustays_mid_df.drop([ 'admittime'], axis=1)

In [46]:
icustays_df

Unnamed: 0,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,87197,176931,209341,metavision,CCU,CCU,7,7,2109-11-16 08:12:18,2109-11-17 17:39:41,1.3940
1,1765,145878,254146,carevue,NICU,NICU,56,56,2183-05-09 09:29:55,2183-05-16 19:18:28,7.4087
2,21269,109697,241691,carevue,MICU,MICU,12,12,2162-10-03 17:03:08,2162-12-01 20:11:22,59.1307
3,5692,151113,279055,carevue,CSRU,CSRU,12,15,2143-10-31 14:16:32,2143-11-01 19:20:00,1.2107
4,5692,193147,241135,metavision,CCU,CCU,7,7,2147-12-16 00:00:03,2147-12-17 16:45:54,1.6985
...,...,...,...,...,...,...,...,...,...,...,...
133,58667,197356,249803,metavision,MICU,MICU,50,50,2130-08-19 20:03:38,2130-08-20 07:17:43,0.4681
134,3391,136471,265244,carevue,NICU,NICU,56,56,2143-03-01 19:51:18,2143-03-01 23:53:41,0.1683
135,31650,137933,282691,carevue,CCU,CCU,7,7,2177-06-07 19:44:18,2177-06-10 15:45:28,2.8341
136,15895,135286,236445,carevue,CSRU,CSRU,14,14,2116-07-06 10:36:00,2116-07-07 21:28:00,1.4528


### Preparing data to be fed in the model

In [47]:
datasets['admissions']=admissions_df
datasets['callout']=callout_df
datasets['chartevents']=chartevents_df
datasets['cptevents']=cptevents_df 
datasets['icustays']=icustays_df 
datasets['inputevents_mv']=inputevents_mv_df 
datasets['labevents']=labevents_df 
datasets['microbiologyevents']=microbiologyevents_df 
datasets['outputevents']=outputevents_df 
datasets['patients']=patients_df 
datasets['prescriptions']=prescriptions_df 
datasets['procedureevents_mv']=procedureevents_mv_df 
datasets['procedures_icd']=procedures_icd_df

In [48]:
dataTest={
    'admissions': admissions_df,
    'patients':patients_df,
#     'icustays':icustays_df
}

## Metadata

In [49]:
from sdv.metadata import MultiTableMetadata

metadata = MultiTableMetadata()

In [50]:
metadata.detect_table_from_dataframe(
    table_name='admissions',
    data=admissions_df
)
metadata.detect_table_from_dataframe(
    table_name='patients',
    data=patients_df
)

# metadata.detect_table_from_dataframe(
#     table_name='icustays',
#     data=icustays_df
# )

In [51]:
print('Auto detected data:\n')
metadata

Auto detected data:



{
    "tables": {
        "admissions": {
            "columns": {
                "subject_id": {
                    "sdtype": "numerical"
                },
                "hadm_id": {
                    "sdtype": "numerical"
                },
                "admittime": {
                    "sdtype": "datetime"
                },
                "dischtime": {
                    "sdtype": "datetime"
                },
                "deathtime": {
                    "sdtype": "datetime"
                },
                "admission_type": {
                    "sdtype": "categorical"
                },
                "admission_location": {
                    "sdtype": "categorical"
                },
                "discharge_location": {
                    "sdtype": "categorical"
                },
                "insurance": {
                    "sdtype": "categorical"
                },
                "language": {
                    "sdtype": "categorical"
    

In [52]:
metadata.update_column(
    table_name='admissions',
    column_name='subject_id',
    sdtype='id',
    regex_format='[0-9]{4,8}'
)

metadata.update_column(
    table_name='admissions',
    column_name='hadm_id',
    sdtype='id',
    regex_format='[0-9]{4,8}'
)

metadata.update_column(
    table_name='admissions',
    column_name='has_chartevents_data',
    sdtype='categorical'
)

In [53]:
metadata.update_column(
    table_name='patients',
    column_name='subject_id',
    sdtype='id',
    regex_format='[0-9]{4,8}'
)


In [54]:
# metadata.update_column(
#     table_name='icustays',
#     column_name='subject_id',
#     sdtype='id'
# )

# metadata.update_column(
#     table_name='icustays',
#     column_name='hadm_id',
#     sdtype='id'
# )

# metadata.update_column(
#     table_name='icustays',
#     column_name='icustay_id',
#     sdtype='id'
# )

# metadata.update_column(
#     table_name='icustays',
#     column_name='first_wardid',
#     sdtype='categorical'
# )

# metadata.update_column(
#     table_name='icustays',
#     column_name='last_wardid',
#     sdtype='categorical'
# )

## Setting primary keys

In [55]:
metadata.set_primary_key(
    table_name='patients',
    column_name='subject_id'
)

metadata.set_primary_key(
    table_name='admissions',
    column_name='hadm_id'
)
# metadata.set_primary_key(
#     table_name='icustays',
#     column_name='icustay_id'
# )


## Setting relationships

In [56]:
metadata.add_relationship(
    parent_table_name='patients',
    child_table_name='admissions',
    parent_primary_key='subject_id',
    child_foreign_key='subject_id'
)

# metadata.add_relationship(
#     parent_table_name='patients',
#     child_table_name='icustays',
#     parent_primary_key='subject_id',
#     child_foreign_key='subject_id'
# )

# metadata.add_relationship(
#     parent_table_name='admissions',
#     child_table_name='icustays',
#     parent_primary_key='hadm_id',
#     child_foreign_key='hadm_id'
# )

In [57]:
metadata.validate()

In [65]:
metadata.visualize().save()

'Metadata.gv'

In [610]:
from sdv.multi_table import HMASynthesizer

synthesizer = HMASynthesizer(metadata)
synthesizer.validate(dataTest)

In [611]:
synthesizer.fit(dataTest)
synthetic_data = synthesizer.sample(scale=1)

Preprocess Tables: 100%|██████████| 2/2 [00:00<00:00, 11.28it/s]



Learning relationships:


(1/1) Tables 'patients' and 'admissions' ('subject_id'): 100%|██████████| 100/100 [00:06<00:00, 14.75it/s]





Modeling Tables: 100%|██████████| 1/1 [00:06<00:00,  6.83s/it]


In [612]:
synthetic_data['patients']

Unnamed: 0,subject_id,gender,dob,dod_hosp,dod_ssn
0,0,F,1980-12-06,NaT,2157-08-15
1,1,M,1985-09-25,NaT,2149-07-20
2,2,F,1914-06-15,NaT,NaT
3,3,M,1971-04-07,2130-07-04,NaT
4,4,F,1963-11-20,NaT,2175-05-05
...,...,...,...,...,...
95,95,M,2003-01-26,NaT,NaT
96,96,F,1911-12-08,2167-10-01,NaT
97,97,F,1942-11-02,NaT,NaT
98,98,F,1989-01-31,2142-05-11,NaT


In [613]:
synthetic_data['admissions'][synthetic_data['admissions']['hadm_id']==90]

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,has_chartevents_data
90,23,90,2174-09-10 21:13:29,2173-10-28 09:58:07,NaT,URGENT,PHYS REFERRAL/NORMAL DELI,SNF,Medicare,Unknown,JEWISH,Unknown,WHITE,2180-11-21 17:14:37,NaT,1


In [614]:
synthetic_data['icustays']

KeyError: 'icustays'

In [615]:
synthetic_data['admissions']['religion'].unique()

array(['JEWISH', 'OTHER', 'UNOBTAINABLE', 'EPISCOPALIAN', 'CATHOLIC',
       'PROTESTANT QUAKER', 'NOT SPECIFIED', 'Unknown'], dtype=object)

In [616]:
admissions_df['deathtime']

0                     NaT
1                     NaT
2                     NaT
3                     NaT
4                     NaT
              ...        
127   2130-08-20 01:56:00
128                   NaT
129                   NaT
130                   NaT
131                   NaT
Name: deathtime, Length: 132, dtype: datetime64[ns]

In [617]:
from sdv.evaluation.multi_table import evaluate_quality

quality_report = evaluate_quality(
    real_data=dataTest,
    synthetic_data=synthetic_data,
    metadata=metadata)

Creating report: 100%|██████████| 5/5 [00:00<00:00,  8.61it/s]



Overall Quality Score: 46.79%

Properties:
Column Shapes: 72.16%
Column Pair Trends: 61.22%
Parent Child Relationships: 7.0%


In [618]:
from sdv.evaluation.multi_table import run_diagnostic

diagnostic_report = run_diangnostic(
    real_data=patients_df,
    synthetic_data=synthetic_data['patients'],
    metadata=metadata)

NameError: name 'run_diangnostic' is not defined

In [619]:
quality_report.get_score()

0.46794415347064416

In [620]:
from sdv.multi_table import DayZSynthesizer

synthesizer = DayZSynthesizer(metadata)
synthetic_data = synthesizer.sample(num_rows_per_table=1000)

ImportError: cannot import name 'DayZSynthesizer' from 'sdv.multi_table' (C:\Users\shrus\anaconda3\lib\site-packages\sdv\multi_table\__init__.py)

In [None]:
from sdv.evaluation.single_table import get_column_plot

fig = get_column_plot(
    real_data=dataTest,
    synthetic_data=synthetic_data,
    table_name='admissions',
    column_name='amenities_fee',
    metadata=metadata
)
    
fig.show()