In [40]:
# !pip uninstall virny -y

In [41]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@development

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@development

In [42]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

## Import dependencies

In [44]:
import os
import pandas as pd
from pprint import pprint
from datetime import datetime, timezone

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

from virny.utils.custom_initializers import create_config_obj, read_model_metric_dfs, create_models_config_from_tuned_params_df
from virny.user_interfaces.metrics_computation_interfaces import compute_metrics_multiple_runs
from virny.preprocessing.basic_preprocessing import preprocess_dataset
from virny.custom_classes.metrics_visualizer import MetricsVisualizer
from virny.custom_classes.metrics_composer import MetricsComposer
from virny.utils.model_tuning_utils import tune_ML_models
from virny.datasets import CompasWithoutSensitiveAttrsDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_compas_models_params_for_tuning

## Define Input Variables

In [45]:
DATASET_SPLIT_SEED = EXPERIMENT_SEEDS[0]
MODELS_TUNING_SEED = EXPERIMENT_SEEDS[0]

In [46]:
ROOT_DIR = os.path.join(os.getcwd(), "..", "..")
# ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'error_analysis'
DB_COLLECTION_NAME = f'exp_{EXPERIMENT_NAME}'
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME)

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', EXPERIMENT_NAME, 'compas_config.yaml')
config = create_config_obj(config_yaml_path=config_yaml_path)

In [47]:
models_params_for_tuning = get_compas_models_params_for_tuning(MODELS_TUNING_SEED)

## Initialize custom objects

In [48]:
data_loader = CompasWithoutSensitiveAttrsDataset()
data_loader.X_data[data_loader.X_data.columns[:5]].head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45
0,0.0,-2.340451,1.0,-15.010999,1
1,0.0,0.0,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,6.0,1
4,0.0,0.0,0.0,7.513697,1


In [49]:
column_transformer = ColumnTransformer(transformers=[
    ('categorical_features', OneHotEncoder(handle_unknown='ignore', sparse=False), data_loader.categorical_columns),
    ('numerical_features', StandardScaler(), data_loader.numerical_columns),
])

In [50]:
base_flow_dataset = preprocess_dataset(data_loader, column_transformer, TEST_SET_FRACTION, DATASET_SPLIT_SEED)

### Tune models and create a models config for metrics computation

In [51]:
tuned_params_df, models_config = tune_ML_models(models_params_for_tuning, base_flow_dataset, config.dataset_name, n_folds=3)
tuned_params_df

2023/06/11, 02:16:26: Tuning DecisionTreeClassifier...
Fitting 3 folds for each of 4 candidates, totalling 12 fits
2023/06/11, 02:16:27: Tuning for DecisionTreeClassifier is finished [F1 score = 0.6472851634267528, Accuracy = 0.6521464646464646]

2023/06/11, 02:16:27: Tuning LogisticRegression...
Fitting 3 folds for each of 8 candidates, totalling 24 fits
2023/06/11, 02:16:28: Tuning for LogisticRegression is finished [F1 score = 0.6569560772050841, Accuracy = 0.6628787878787878]

2023/06/11, 02:16:28: Tuning RandomForestClassifier...
Fitting 3 folds for each of 4 candidates, totalling 12 fits
2023/06/11, 02:16:29: Tuning for RandomForestClassifier is finished [F1 score = 0.6644226216232815, Accuracy = 0.6691919191919191]

2023/06/11, 02:16:29: Tuning XGBClassifier...
Fitting 3 folds for each of 4 candidates, totalling 12 fits
2023/06/11, 02:16:33: Tuning for XGBClassifier is finished [F1 score = 0.6645633664344377, Accuracy = 0.6691919191919191]



Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.647285,0.652146,"{'criterion': 'entropy', 'max_depth': 20, 'max..."
1,COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.656956,0.662879,"{'C': 1, 'max_iter': 250, 'penalty': 'l2', 'so..."
2,COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.664423,0.669192,"{'max_depth': 10, 'max_features': 0.6, 'min_sa..."
3,COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.664563,0.669192,"{'lambda': 100, 'learning_rate': 0.1, 'max_dep..."


In [52]:
now = datetime.now(timezone.utc)
date_time_str = now.strftime("%Y%m%d__%H%M%S")
tuned_df_path = os.path.join(ROOT_DIR, 'results', 'models_tuning', f'tuning_results_{config.dataset_name}_{date_time_str}.csv')
tuned_params_df.to_csv(tuned_df_path, sep=",", columns=tuned_params_df.columns, float_format="%.4f", index=False)

Create models_config from the saved tuned_params_df for higher reliability

In [53]:
models_config = create_models_config_from_tuned_params_df(models_params_for_tuning, tuned_df_path)
pprint(models_config)

{'DecisionTreeClassifier': DecisionTreeClassifier(criterion='entropy', max_depth=20, max_features='sqrt',
                       min_samples_split=0.1, random_state=100),
 'LogisticRegression': LogisticRegression(C=1, max_iter=250, random_state=100, solver='newton-cg'),
 'RandomForestClassifier': RandomForestClassifier(max_depth=10, max_features=0.6, random_state=100),
 'XGBClassifier': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, lambda=100, learning_rate=0.1,
              max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=7, max_leaves=None,
              min_child_weight=None, missing=nan, 

## Subgroup Metrics Computation

In [54]:
multiple_run_metrics_dct = compute_metrics_multiple_runs(base_flow_dataset, config, models_config, SAVE_RESULTS_DIR_PATH)

Multiple runs progress:   0%|          | 0/2 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s]

Look at several columns in top rows of computed metrics

In [55]:
sample_model_metrics_df = multiple_run_metrics_dct[list(models_config.keys())[0]]
sample_model_metrics_df[sample_model_metrics_df.columns[:6]].head(20)

Unnamed: 0,Metric,overall,sex_priv,sex_priv_correct,sex_priv_incorrect,sex_dis
0,Mean,0.526494,0.567177,0.57839,0.541859,0.516871
1,Std,0.063744,0.06035,0.048851,0.086313,0.064546
2,IQR,0.074362,0.06899,0.057801,0.094254,0.075633
3,Aleatoric_Uncertainty,0.883299,0.881098,0.87274,0.89997,0.883819
4,Overall_Uncertainty,0.901517,0.899484,0.883848,0.934792,0.901998
5,Jitter,0.171759,0.163256,0.115238,0.271685,0.17377
6,Per_Sample_Accuracy,0.677462,0.70099,0.93,0.183871,0.671897
7,Label_Stability,0.777652,0.794059,0.86,0.645161,0.77377
8,TPR,0.653846,0.525,1.0,0.0,0.677273
9,TNR,0.735075,0.803279,1.0,0.0,0.714976


## Group Metrics Composition

In [56]:
models_metrics_dct = read_model_metric_dfs(SAVE_RESULTS_DIR_PATH, model_names=list(models_config.keys()))

In [57]:
metrics_composer = MetricsComposer(models_metrics_dct, config.sensitive_attributes_dct)

Compute composed metrics

In [58]:
models_composed_metrics_df = metrics_composer.compose_metrics()

In [59]:
models_composed_metrics_df.head(20)

Unnamed: 0,Metric,sex,race,sex&race,Model_Name
0,Equalized_Odds_TPR,0.152273,0.281008,0.372549,DecisionTreeClassifier
1,Equalized_Odds_FPR,0.059832,0.114977,0.116379,DecisionTreeClassifier
2,Equalized_Odds_FNR,-0.152273,-0.281008,-0.372549,DecisionTreeClassifier
3,Disparate_Impact,1.09893,1.360771,1.359477,DecisionTreeClassifier
4,Statistical_Parity_Difference,0.084091,0.267395,0.269608,DecisionTreeClassifier
5,Accuracy_Parity,0.018237,0.0564,0.089813,DecisionTreeClassifier
6,Label_Stability_Ratio,0.955201,0.967051,0.906661,DecisionTreeClassifier
7,IQR_Parity,0.011284,0.009692,0.020932,DecisionTreeClassifier
8,Std_Parity,0.008264,0.005659,0.012393,DecisionTreeClassifier
9,Std_Ratio,1.142134,1.092229,1.221141,DecisionTreeClassifier
