In [1]:
!pip install datgan

Looking in indexes: http://mirrors.aliyun.com/pypi/simple


In [2]:
import os
import pickle
import numpy as np
import pandas as pd
import networkx as nx

from sklearn.preprocessing import OrdinalEncoder

from datgan import stats_assessment
from datgan import ml_assessment, transform_results

# For the Python notebook
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [4]:
df_orig = pd.read_csv("Portugal_Original.csv", index_col=0)
df_orig

Unnamed: 0,TOTAL_INDIVIDUALS,MALE_INDIVIDUALS,FEMALE_INDIVIDUALS,INDIVIDUALS_0_14,INDIVIDUALS_15_24,INDIVIDUALS_25_64,INDIVIDUALS_65_PLUS,CLASSIC_BUILDINGS,CLASSIC_BUILDINGS_1_2_DWELLINGS,CLASSIC_BUILDINGS_3_MORE_DWELLINGS,...,CLASSIC_FAMILY_VACANT_SECONDARY_RESIDENCES residencesN_ALOJAMENTOS_FAM_CLASS_VAGOS_OU_RESID_SECUNDARIA,HABITUAL_RESIDENCES_WHEELCHAIR_ACCESS,HABITUAL_RESIDENCES_PARKING,HABITUAL_RESIDENCES_OWNER_OCCUPIED,HABITUAL_RESIDENCES_RENTED,PRIVATE_HOUSEHOLDS,HOUSEHOLDS_1_2_PEOPLE,HOUSEHOLDS_3_MORE_PEOPLE,FAMILY_NUCLEI,FAMILY_NUCLEI_CHILDREN_UNDER_25
0,29,13,16,3,1,15,10,23,23,0,...,11,4,12,12,0,12,8,4,11,2
1,25,11,14,1,4,11,9,22,22,0,...,11,5,6,11,0,11,8,3,7,4
2,26,13,13,3,3,9,11,20,20,0,...,9,1,5,10,0,11,7,4,10,5
3,12,5,7,0,0,5,7,11,11,0,...,5,0,5,6,0,6,5,1,5,0
4,35,17,18,2,6,15,12,27,27,0,...,15,7,12,13,0,13,7,6,9,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210165,72,38,34,5,7,45,15,72,64,8,...,67,13,24,30,1,33,23,10,25,9
210166,102,55,47,7,11,70,14,63,62,1,...,28,28,25,30,1,40,21,19,32,14
210167,274,132,142,20,70,140,44,143,131,12,...,155,62,71,72,13,120,74,46,73,28
210168,282,147,135,31,36,178,37,166,164,2,...,66,75,81,99,6,112,57,55,89,42


In [5]:
continuous_columns = []
i = 1
rename_cols = {}
for j in df_orig.columns.tolist():
    rename_cols[j] = f"{i:0{5}d}"
    continuous_columns.append(f"{i:0{5}d}")
    i = i + 1
continuous_columns

['00001',
 '00002',
 '00003',
 '00004',
 '00005',
 '00006',
 '00007',
 '00008',
 '00009',
 '00010',
 '00011',
 '00012',
 '00013',
 '00014',
 '00015',
 '00016',
 '00017',
 '00018',
 '00019',
 '00020',
 '00021',
 '00022',
 '00023',
 '00024',
 '00025',
 '00026',
 '00027']

In [6]:
df_orig.rename(columns = rename_cols, inplace=True)
df_orig.info()

<class 'pandas.core.frame.DataFrame'>
Index: 210170 entries, 0 to 210169
Data columns (total 27 columns):
 #   Column  Non-Null Count   Dtype
---  ------  --------------   -----
 0   00001   210170 non-null  int64
 1   00002   210170 non-null  int64
 2   00003   210170 non-null  int64
 3   00004   210170 non-null  int64
 4   00005   210170 non-null  int64
 5   00006   210170 non-null  int64
 6   00007   210170 non-null  int64
 7   00008   210170 non-null  int64
 8   00009   210170 non-null  int64
 9   00010   210170 non-null  int64
 10  00011   210170 non-null  int64
 11  00012   210170 non-null  int64
 12  00013   210170 non-null  int64
 13  00014   210170 non-null  int64
 14  00015   210170 non-null  int64
 15  00016   210170 non-null  int64
 16  00017   210170 non-null  int64
 17  00018   210170 non-null  int64
 18  00019   210170 non-null  int64
 19  00020   210170 non-null  int64
 20  00021   210170 non-null  int64
 21  00022   210170 non-null  int64
 22  00023   210170 non-null  

In [7]:
synth_files = ['datgan_dag_corrthreshold_01_b500_e250_samples.csv',
               'datgan_dag_corrthreshold_01_b500_e50_samples.csv',
               'datgan_dag_corrthreshold_01_b500_e100_samples.csv',
               'datgan_dag_corrthreshold_01_b500_e25_samples.csv'
              ]

In [8]:

results_path = r"Portugal_Model_Evaluation/results_b500/"

if not os.path.exists(results_path):
    os.makedirs(results_path)

In [9]:
stats_str = ['mae', 'rmse', 'r2', 'srmse', 'corr']

# Statistical assessment

## First level

In [10]:
pickle_name = 'stats_first_level.pickle'
aggregation_level = 1

first_lvl_stats = {}

try:
    first_lvl_stats = pickle.load(open(results_path + pickle_name, 'rb'))
    print('Found previous pickel file, using that')
except:
    print('No previous results found, starting fresh')

No previous results found, starting fresh


In [11]:
for i, f in enumerate(synth_files):

    file_name = f.split('/')[-1].split('.')[0]

    if file_name in first_lvl_stats:
        print("Results for file \033[1m{}\033[0m ({}/{}) already exists!".format(file_name, i+1, len(synth_files)))
    else:
        print("Preparing stats for file \033[1m{}\033[0m ({}/{})".format(file_name, i+1, len(synth_files)))

        first_lvl_stats[file_name] = {}

        df_synth = pd.read_csv(f, index_col=False)

        stats = stats_assessment(df_orig, df_synth, continuous_columns, aggregation_level)

        first_lvl_stats[file_name] = stats

    pickle.dump(first_lvl_stats, open(results_path + pickle_name, 'wb'))

print("\033[1mFINISHED!\033[0m")

Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e250_samples[0m (1/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e50_samples[0m (2/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e100_samples[0m (3/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e25_samples[0m (4/4)
[1mFINISHED![0m


### Human-readable results


In [12]:
res = {}

for test in ['all', 'cont', 'cat']:

    res[test] = {}

    if test == 'all':
        cols = df_orig.columns
    elif test == 'cont':
        cols = continuous_columns
    elif test == 'cat':
        cols = set(df_orig.columns) - set(continuous_columns)

    for s in stats_str:
        res[test][s] = {}

    for m in first_lvl_stats.keys():

        for s in stats_str:
            res[test][s][m] = []

            for c in cols:
                res[test][s][m].append(first_lvl_stats[m][c][s])

In [13]:
avg = {}

for test in ['all', 'cont', 'cat']:

    avg[test] = {}

    for s in stats_str:
        avg[test][s] = {}

        for m in first_lvl_stats.keys():
            avg[test][s][m] = {
                'mean': np.mean(res[test][s][m]),
                'std': np.std(res[test][s][m])
            }

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


In [14]:
for test in ['all', 'cont', 'cat']:

    if test == 'all':
        str_ = 'on all columns'
    elif test == 'cont':
        str_ = 'on continuous columns'
    elif test == 'cat':
        str_ = 'on categorical columns'

    for s in stats_str: #['srmse']:#stats:
        print('Ranking {} based on {}:'.format(str_, s.upper()))

        if s in ['r2', 'corr']:
            sorted_dct = {k: v for k, v in sorted(avg[test][s].items(), key=lambda item: item[1]['mean'])[::-1]}
        else:
            sorted_dct = {k: v for k, v in sorted(avg[test][s].items(), key=lambda item: item[1]['mean'])}

        for i, item in enumerate(sorted_dct):
            print('  {:>2}. {:<15} - {:.2e} ± {:.2e}'.format(i+1, item, sorted_dct[item]['mean'], sorted_dct[item]['std']))
        print()


Ranking on all columns based on MAE:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 2.54e-03 ± 1.78e-03
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 4.49e-03 ± 2.43e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 5.44e-03 ± 2.54e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 7.58e-03 ± 3.52e-03

Ranking on all columns based on RMSE:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 4.68e-03 ± 3.77e-03
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 8.56e-03 ± 4.79e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 1.05e-02 ± 5.05e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 1.46e-02 ± 6.93e-03

Ranking on all columns based on R2:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 1.00e+00 ± 7.78e-04
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 9.99e-01 ± 1.15e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 9.98e-01 ± 1.31e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 9.97e-01 ± 2.45e-03

Rank

## Second level

In [15]:
pickle_name = 'stats_second_level.pickle'
aggregation_level = 2

second_lvl_stats = {}

try:
    second_lvl_stats = pickle.load(open(results_path + pickle_name, 'rb'))
    print('Found previous pickel file, using that')
except:
    print('No previous results found, starting fresh')

No previous results found, starting fresh


In [16]:
for i, f in enumerate(synth_files):

    file_name = f.split('/')[-1].split('.')[0]

    if file_name in second_lvl_stats:
        print("Results for file \033[1m{}\033[0m ({}/{}) already exists!".format(file_name, i+1, len(synth_files)))
    else:
        print("Preparing stats for file \033[1m{}\033[0m ({}/{})".format(file_name, i+1, len(synth_files)))

        second_lvl_stats[file_name] = {}

        df_synth = pd.read_csv(f, index_col=False)

        stats = stats_assessment(df_orig, df_synth, continuous_columns, aggregation_level)

        second_lvl_stats[file_name] = stats

    pickle.dump(second_lvl_stats, open(results_path + pickle_name, 'wb'))

print("\033[1mFINISHED!\033[0m")

Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e250_samples[0m (1/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e50_samples[0m (2/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e100_samples[0m (3/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e25_samples[0m (4/4)
[1mFINISHED![0m


### Human-readable results

In [17]:
res = {}

for s in stats_str:
    res[s] = {}

for m in second_lvl_stats.keys():

    for s in stats_str:
        res[s][m] = []

        for c in second_lvl_stats[m].keys():
            res[s][m].append(second_lvl_stats[m][c][s])

In [18]:
avg = {}

for s in stats_str:
    avg[s] = {}

    for m in second_lvl_stats.keys():
        avg[s][m] = {
            'mean': np.mean(res[s][m]),
            'std': np.std(res[s][m])
        }

In [19]:
for s in stats_str: #['srmse']:#stats:
    print('Ranking based on {} for aggregation level {}:'.format(s.upper(), aggregation_level))

    if s in ['r2', 'corr']:
        sorted_dct = {k: v for k, v in sorted(avg[s].items(), key=lambda item: item[1]['mean'])[::-1]}
    else:
        sorted_dct = {k: v for k, v in sorted(avg[s].items(), key=lambda item: item[1]['mean'])}

    for i, item in enumerate(sorted_dct):
        print('  {:>2}. {:<15} - {:.2e} ± {:.2e}'.format(i+1, item, sorted_dct[item]['mean'], sorted_dct[item]['std']))
    print()

Ranking based on MAE for aggregation level 2:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 1.24e-03 ± 6.33e-04
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 1.51e-03 ± 6.21e-04
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 1.83e-03 ± 5.74e-04
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 2.52e-03 ± 8.46e-04

Ranking based on RMSE for aggregation level 2:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 3.86e-03 ± 2.18e-03
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 5.13e-03 ± 2.06e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 6.38e-03 ± 1.81e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 9.00e-03 ± 2.40e-03

Ranking based on R2 for aggregation level 2:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 9.99e-01 ± 1.28e-03
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 9.98e-01 ± 1.23e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 9.98e-01 ± 1.21e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples 

## Third level

In [20]:
pickle_name = 'stats_third_level.pickle'
aggregation_level = 3

third_lvl_stats = {}

try:
    third_lvl_stats = pickle.load(open(results_path + pickle_name, 'rb'))
    print('Found previous pickel file, using that')
except:
    print('No previous results found, starting fresh')

No previous results found, starting fresh


In [21]:
for i, f in enumerate(synth_files):

    file_name = f.split('/')[-1].split('.')[0]

    if file_name in third_lvl_stats:
        print("Results for file \033[1m{}\033[0m ({}/{}) already exists!".format(file_name, i+1, len(synth_files)))
    else:
        print("Preparing stats for file \033[1m{}\033[0m ({}/{})".format(file_name, i+1, len(synth_files)))

        third_lvl_stats[file_name] = {}

        df_synth = pd.read_csv(f, index_col=False)

        stats = stats_assessment(df_orig, df_synth, continuous_columns, aggregation_level)

        third_lvl_stats[file_name] = stats

    pickle.dump(third_lvl_stats, open(results_path + pickle_name, 'wb'))

print("\033[1mFINISHED!\033[0m")

Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e250_samples[0m (1/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e50_samples[0m (2/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e100_samples[0m (3/4)
Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e25_samples[0m (4/4)
[1mFINISHED![0m


### Human-readable results

In [22]:
res = {}

for s in stats_str:
    res[s] = {}

for m in third_lvl_stats.keys():

    for s in stats_str:
        res[s][m] = []

        for c in third_lvl_stats[m].keys():
            res[s][m].append(third_lvl_stats[m][c][s])

In [23]:
avg = {}

for s in stats_str:
    avg[s] = {}

    for m in third_lvl_stats.keys():
        avg[s][m] = {
            'mean': np.mean(res[s][m]),
            'std': np.std(res[s][m])
        }

In [24]:
for s in stats_str: #['srmse']:#stats:
    print('Ranking based on {} for aggregation level {}:'.format(s.upper(), aggregation_level))

    if s in ['r2', 'corr']:
        sorted_dct = {k: v for k, v in sorted(avg[s].items(), key=lambda item: item[1]['mean'])[::-1]}
    else:
        sorted_dct = {k: v for k, v in sorted(avg[s].items(), key=lambda item: item[1]['mean'])}

    for i, item in enumerate(sorted_dct):
        print('  {:>2}. {:<15} - {:.2e} ± {:.2e}'.format(i+1, item, sorted_dct[item]['mean'], sorted_dct[item]['std']))
    print()

Ranking based on MAE for aggregation level 3:
   1. datgan_dag_corrthreshold_01_b500_e25_samples - 6.61e-04 ± 2.95e-04
   2. datgan_dag_corrthreshold_01_b500_e100_samples - 6.65e-04 ± 3.03e-04
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 7.94e-04 ± 2.91e-04
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 1.11e-03 ± 4.14e-04

Ranking based on RMSE for aggregation level 3:
   1. datgan_dag_corrthreshold_01_b500_e100_samples - 3.06e-03 ± 1.45e-03
   2. datgan_dag_corrthreshold_01_b500_e25_samples - 3.22e-03 ± 1.28e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 4.14e-03 ± 1.08e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples - 5.93e-03 ± 1.41e-03

Ranking based on R2 for aggregation level 3:
   1. datgan_dag_corrthreshold_01_b500_e25_samples - 9.98e-01 ± 1.33e-03
   2. datgan_dag_corrthreshold_01_b500_e100_samples - 9.98e-01 ± 1.86e-03
   3. datgan_dag_corrthreshold_01_b500_e250_samples - 9.97e-01 ± 1.31e-03
   4. datgan_dag_corrthreshold_01_b500_e50_samples 

# Machine Learning efficacy


In [25]:
def check_low_appearing_vars(df):

    for c in df.columns:
        val = df[c].value_counts()
        if len(val) < 20:
            val = val/len(df)
            if any(val < 0.01) and c != 'choice':
                print('Variable {}: '.format(c))
                for idx, v in zip(val.index, val):
                    if v < 0.01:
                        print('  {} - {:.2f}% ({:d})'.format(idx, 100*v, int(v*len(df))))
                print()

In [26]:
check_low_appearing_vars(df_orig)

In [27]:
print(check_low_appearing_vars(df_orig))

None


In [None]:
# def replace_low_appearing_values(df):

#     dct_ = {}
#     for i in df['hh_vehicles'].unique():
#         if i >= 5:
#             dct_[i] = '5+'
#         else:
#             dct_[i] = str(i)
#     df['hh_vehicles'].replace(dct_, inplace=True)

#     dct_ = {}
#     for i in df['hh_size'].unique():
#         if i >= 6:
#             dct_[i] = '6+'
#         else:
#             dct_[i] = str(i)
#     df['hh_size'].replace(dct_, inplace=True)

#     dct_ = {}
#     for i in df['hh_bikes'].unique():
#         if i >= 6:
#             dct_[i] = '6+'
#         else:
#             dct_[i] = str(i)
#     df['hh_bikes'].replace(dct_, inplace=True)

In [None]:
# replace_low_appearing_values(df_orig)

In [28]:
check_low_appearing_vars(df_orig)

In [29]:
# Define the categorical columns
categorical_columns = list(set(df_orig.columns) - set(continuous_columns))

In [30]:
enc = OrdinalEncoder()
df_orig[categorical_columns] = enc.fit_transform(df_orig[categorical_columns])

In [31]:
pickle_name = 'ml_efficacy.pickle'

cv_modelscores = {}

try:
    cv_modelscores = pickle.load(open(results_path + pickle_name, 'rb'))
    print('Found previous pickel file, using that')
except:
    print('No previous results found, starting fresh')

No previous results found, starting fresh


In [32]:
for i, f in enumerate(synth_files):

    file_name = f.split('/')[-1].split('.')[0]

    if file_name in cv_modelscores:
        print("Results for file \033[1m{}\033[0m ({}/{}) already exists!".format(file_name, i+1, len(synth_files)))
    else:
        print("Preparing stats for file \033[1m{}\033[0m ({}/{})".format(file_name, i+1, len(synth_files)))

        cv_modelscores[file_name] = {}

        # Load the synthetic dataset
        df_synth = pd.read_csv(f, index_col=False)

        # Replace the values rarely appearing
        #replace_low_appearing_values(df_synth)

        # Encode the synthetic dataset
        df_synth[categorical_columns] = enc.transform(df_synth[categorical_columns])

        res = ml_assessment(df_orig, df_synth, continuous_columns, categorical_columns)

        cv_modelscores[file_name] = res

    pickle.dump(cv_modelscores, open(results_path + pickle_name, 'wb'))

print("\033[1mFINISHED!\033[0m")

Preparing stats for file [1mdatgan_dag_corrthreshold_01_b500_e250_samples[0m (1/4)
    Column: 00019 (1/27) -- Fold 1/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009691 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3680
[LightGBM] [Info] Number of data points in the train set: 168136, number of used features: 26
[LightGBM] [Info] Start training from score 4.687812
 -- Fold 2/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010884 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3681
[LightGBM] [Info] Number of data points in the train set: 168136, number of used features: 26
[LightGBM] [Info] Start training from score 4.644591
 -- Fold 3/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010352 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightG

Raw results are a bit difficult to assess. Therefore, we provide a way to get an ordered list of the synthetic datasets tested. However, for this, you need to run the ML assessment on the original dataset with a specific key!

In [None]:
if 'original' in cv_modelscores:
    print("Results for file \033[1m{}\033[0m already exists!".format('original'))
else:
    print("Preparing stats for file \033[1m{}\033[0m".format('original'))

    res = ml_assessment(df_orig, df_orig, continuous_columns, categorical_columns)
    cv_modelscores['original'] = res
    pickle.dump(cv_modelscores, open(results_path + pickle_name, 'wb'))
    print("\033[1mFINISHED!\033[0m")

Preparing stats for file [1moriginal[0m
    Column: 00019 (1/27) -- Fold 1/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5292
[LightGBM] [Info] Number of data points in the train set: 168136, number of used features: 26
[LightGBM] [Info] Start training from score 6.672444
 -- Fold 2/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008677 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5282
[LightGBM] [Info] Number of data points in the train set: 168136, number of used features: 26
[LightGBM] [Info] Start training from score 6.711412
 -- Fold 3/5[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5275
[LightGBM] [Info

In [None]:
cont_sorted, cat_sorted = transform_results(cv_modelscores, continuous_columns, categorical_columns)

In [None]:
i=1
print('   | {:<30} | {:<30}'.format('categorical', 'continuous'))
print('-----------------------------------------------------------')
for a, b in zip(cat_sorted, cont_sorted):
    print('{:>2} | {:<30} | {:<30}'.format(i, '{:<12}: {:.3f}'.format(a[0], a[1]), '{:<12}: {:.3f}'.format(b[0], b[1])))
    i+=1

   | categorical                    | continuous                    
-----------------------------------------------------------
 1 | datgan_dag_corrthreshold_01_b500_e100_samples: 0.000 | original    : 54.752          
 2 | datgan_dag_corrthreshold_05_b500_e100_samples: 0.000 | datgan_dag_corrthreshold_01_b500_e500_samples: 1547.536
 3 | original    : 0.000            | datgan_dag_corrthreshold_01_b500_e250_samples: 1616.258
 4 | datgan_dag_corrthreshold_01_b500_e250_samples: 0.000 | datgan_dag_corrthreshold_01_b500_e100_samples: 2105.203
 5 | datgan_dag_corrthreshold_01_b500_e500_samples: 0.000 | datgan_dag_corrthreshold_05_b500_e100_samples: 2644.191
