In [20]:
# !pip uninstall virny -y

In [21]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@development

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@development

In [22]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

## Import dependencies

In [24]:
import os
import pandas as pd
from pprint import pprint
from datetime import datetime, timezone

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

from virny.utils.custom_initializers import create_config_obj, read_model_metric_dfs, create_models_config_from_tuned_params_df
from virny.user_interfaces.metrics_computation_interfaces import compute_metrics_multiple_runs
from virny.preprocessing.basic_preprocessing import preprocess_dataset
from virny.custom_classes.metrics_visualizer import MetricsVisualizer
from virny.custom_classes.metrics_composer import MetricsComposer
from virny.utils.model_tuning_utils import tune_ML_models
from virny.datasets import CompasWithoutSensitiveAttrsDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_compas_models_params_for_tuning

## Define Input Variables

In [25]:
DATASET_SPLIT_SEED = EXPERIMENT_SEEDS[0]
MODELS_TUNING_SEED = EXPERIMENT_SEEDS[0]

In [26]:
ROOT_DIR = os.path.join(os.getcwd(), "..", "..")
# ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'error_analysis'
DB_COLLECTION_NAME = f'exp_{EXPERIMENT_NAME}'
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME)

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', EXPERIMENT_NAME, 'compas_config.yaml')
config = create_config_obj(config_yaml_path=config_yaml_path)

In [27]:
models_params_for_tuning = get_compas_models_params_for_tuning(MODELS_TUNING_SEED)

## Initialize custom objects

In [28]:
data_loader = CompasWithoutSensitiveAttrsDataset()
data_loader.X_data[data_loader.X_data.columns[:5]].head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45
0,0.0,-2.340451,1.0,-15.010999,1
1,0.0,0.0,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,6.0,1
4,0.0,0.0,0.0,7.513697,1


In [29]:
column_transformer = ColumnTransformer(transformers=[
    ('categorical_features', OneHotEncoder(handle_unknown='ignore', sparse=False), data_loader.categorical_columns),
    ('numerical_features', StandardScaler(), data_loader.numerical_columns),
])

In [30]:
base_flow_dataset = preprocess_dataset(data_loader, column_transformer, TEST_SET_FRACTION, DATASET_SPLIT_SEED)

### Tune models and create a models config for metrics computation

In [31]:
tuned_params_df, models_config = tune_ML_models(models_params_for_tuning, base_flow_dataset, config.dataset_name, n_folds=3)
tuned_params_df

2023/06/11, 13:33:00: Tuning DecisionTreeClassifier...
Fitting 3 folds for each of 4 candidates, totalling 12 fits
2023/06/11, 13:33:02: Tuning for DecisionTreeClassifier is finished [F1 score = 0.6472851634267528, Accuracy = 0.6521464646464646]



Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.647285,0.652146,"{'criterion': 'entropy', 'max_depth': 20, 'max..."


In [32]:
now = datetime.now(timezone.utc)
date_time_str = now.strftime("%Y%m%d__%H%M%S")
tuned_df_path = os.path.join(ROOT_DIR, 'results', 'models_tuning', f'tuning_results_{config.dataset_name}_{date_time_str}.csv')
tuned_params_df.to_csv(tuned_df_path, sep=",", columns=tuned_params_df.columns, float_format="%.4f", index=False)

Create models_config from the saved tuned_params_df for higher reliability

In [33]:
models_config = create_models_config_from_tuned_params_df(models_params_for_tuning, tuned_df_path)
pprint(models_config)

{'DecisionTreeClassifier': DecisionTreeClassifier(criterion='entropy', max_depth=20, max_features='sqrt',
                       min_samples_split=0.1, random_state=100)}


## Subgroup Metrics Computation

In [34]:
multiple_run_metrics_dct = compute_metrics_multiple_runs(base_flow_dataset, config, models_config, SAVE_RESULTS_DIR_PATH)

Multiple runs progress:   0%|          | 0/2 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Look at several columns in top rows of computed metrics

In [35]:
sample_model_metrics_df = multiple_run_metrics_dct[list(models_config.keys())[0]]
sample_model_metrics_df[sample_model_metrics_df.columns[:6]].head(20)

Unnamed: 0,Metric,overall,sex_priv,sex_priv_correct,sex_priv_incorrect,sex_dis
0,Mean,0.532307,0.572984,0.582922,0.548312,0.522686
1,Std,0.065312,0.064183,0.057393,0.081042,0.065579
2,IQR,0.075384,0.07768,0.063336,0.113292,0.074842
3,Aleatoric_Uncertainty,0.873303,0.86586,0.859142,0.88254,0.875063
4,Overall_Uncertainty,0.893748,0.887829,0.875711,0.917913,0.895149
5,Jitter,0.102273,0.085589,0.052469,0.167816,0.106219
6,Per_Sample_Accuracy,0.685606,0.717822,0.958333,0.12069,0.677986
7,Label_Stability,0.862879,0.887129,0.933333,0.772414,0.857143
8,TPR,0.65,0.55,1.0,0.0,0.668182
9,TNR,0.742537,0.819672,1.0,0.0,0.719807


## Group Metrics Composition

In [36]:
models_metrics_dct = read_model_metric_dfs(SAVE_RESULTS_DIR_PATH, model_names=list(models_config.keys()))

In [37]:
metrics_composer = MetricsComposer(models_metrics_dct, config.sensitive_attributes_dct)

Compute composed metrics

In [38]:
models_composed_metrics_df = metrics_composer.compose_metrics()

In [39]:
models_composed_metrics_df.head(20)

Unnamed: 0,Metric,sex,race,sex&race,Model_Name
0,Equalized_Odds_TPR,0.152273,0.281008,0.372549,DecisionTreeClassifier
1,Equalized_Odds_FPR,0.059832,0.114977,0.116379,DecisionTreeClassifier
2,Equalized_Odds_FNR,-0.152273,-0.281008,-0.372549,DecisionTreeClassifier
3,Disparate_Impact,1.09893,1.360771,1.359477,DecisionTreeClassifier
4,Statistical_Parity_Difference,0.084091,0.267395,0.269608,DecisionTreeClassifier
5,Accuracy_Parity,0.018237,0.0564,0.089813,DecisionTreeClassifier
6,Label_Stability_Ratio,0.955201,0.967051,0.906661,DecisionTreeClassifier
7,IQR_Parity,0.011284,0.009692,0.020932,DecisionTreeClassifier
8,Std_Parity,0.008264,0.005659,0.012393,DecisionTreeClassifier
9,Std_Ratio,1.142134,1.092229,1.221141,DecisionTreeClassifier
