# Visualizations for German Models

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [3]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "data-cleaning-stability":
    os.chdir("../../../..")

print('Current location: ', os.getcwd())

Current location:  c:\RAI\data-cleaning-stability


In [4]:
from source.visualizations.models_viz import create_box_plots_for_diff_baseline_models, create_box_plots_for_diff_models, create_box_plots_for_diff_imputers_v2
from configs.constants import (ACS_INCOME_DATASET, LAW_SCHOOL_DATASET, GERMAN_CREDIT_DATASET,
                               CARDIOVASCULAR_DISEASE_DATASET, BANK_MARKETING_DATASET, DIABETES_DATASET, ErrorRepairMethod)

## Initialize Configs

In [5]:
DATASET_NAME = GERMAN_CREDIT_DATASET
DATASETS_SENSITIVE_ATTRS = {
    ACS_INCOME_DATASET: ['SEX', 'RAC1P', 'SEX&RAC1P'],
    LAW_SCHOOL_DATASET: ['male', 'race', 'male&race'],
    GERMAN_CREDIT_DATASET: ['sex', 'age', 'sex&age'],
    CARDIOVASCULAR_DISEASE_DATASET: ['gender'],
    BANK_MARKETING_DATASET: ['age'],
    DIABETES_DATASET: ['Gender'],
}
SENSITIVE_ATTR_FOR_DISPARITY_METRICS = 'sex'

In [6]:
from source.custom_classes.database_client import DatabaseClient

db_client = DatabaseClient()
db_client.connect()

## Metric Visualizations

### Overall Metrics

In [None]:
create_box_plots_for_diff_baseline_models(dataset_name=DATASET_NAME,
                                          metric_name='Accuracy',
                                          db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.median_mode.value,
                                 metric_name='F1',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.datawig.value,
                                 metric_name='Label_Stability',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.datawig.value,
                                 metric_name='Epistemic_Uncertainty',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.datawig.value,
                                 metric_name='Aleatoric_Uncertainty',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      metric_name='accuracy',
                                      db_client=db_client)

In [8]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='rf_clf',
                                      metric_name='f1',
                                      db_client=db_client)

Prepared a plot for an MCAR train set
Prepared a plot for an MAR train set
Prepared a plot for an MNAR train set


In [9]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='rf_clf',
                                      metric_name='label_stability',
                                      db_client=db_client)

Prepared a plot for an MCAR train set
Prepared a plot for an MAR train set
Prepared a plot for an MNAR train set


In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      metric_name='aleatoric_uncertainty',
                                      db_client=db_client)

In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      metric_name='std',
                                      db_client=db_client)

### Disparity Metrics

In [None]:
create_box_plots_for_diff_baseline_models(dataset_name=DATASET_NAME,
                                          metric_name='Equalized_Odds_TNR',
                                          group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                          db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.automl.value,
                                 group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                 metric_name='Selection_Rate_Difference',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.datawig.value,
                                 group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                 metric_name='Selection_Rate_Difference',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.k_means_clustering.value,
                                 group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                 metric_name='Selection_Rate_Difference',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_models(dataset_name=DATASET_NAME,
                                 null_imputer_name=ErrorRepairMethod.miss_forest.value,
                                 group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                 metric_name='Selection_Rate_Difference',
                                 db_client=db_client)

In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                      metric_name='Equalized_Odds_FPR',
                                      db_client=db_client)

In [10]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='rf_clf',
                                      group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                      metric_name='Equalized_Odds_TPR',
                                      db_client=db_client)

Prepared a plot for an MCAR train set
Prepared a plot for an MAR train set
Prepared a plot for an MNAR train set


In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                      metric_name='Label_Stability_Difference',
                                      db_client=db_client)

In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                      metric_name='Aleatoric_Uncertainty_Difference',
                                      db_client=db_client)

In [None]:
create_box_plots_for_diff_imputers_v2(dataset_name=DATASET_NAME,
                                      model_name='gandalf_clf',
                                      group=SENSITIVE_ATTR_FOR_DISPARITY_METRICS,
                                      metric_name='Std_Difference',
                                      db_client=db_client)

In [24]:
db_client.close()