In [27]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [29]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("..")

print('Current location: ', os.getcwd())

Current location:  /home/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [30]:
import os
import pandas as pd
from datetime import datetime, timezone

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from source.custom_initializers import create_config_obj
from source.custom_classes.data_loaders import CompasWithoutSensitiveAttrsDataset
from source.metrics_computation_interfaces import run_metrics_computation_with_config, compute_model_metrics_with_config

## Configs

In [31]:
config = create_config_obj(config_yaml_path=os.path.join('configs', 'experiment1_compas_config.yaml'))
SAVE_RESULTS_DIR_PATH = os.path.join('results', 'hypothesis_space',
                                     f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime("%Y%m%d__%H%M%S")}')

In [32]:
models_config = {
    'DecisionTreeClassifier': DecisionTreeClassifier(criterion='gini',
                                                     max_depth=20,
                                                     max_features=0.6,
                                                     min_samples_split=0.1),
    'LogisticRegression': LogisticRegression(C=1,
                                             max_iter=50,
                                             penalty='l2',
                                             solver='newton-cg'),
}

## Load dataset

In [33]:
dataset = CompasWithoutSensitiveAttrsDataset(dataset_path='data/COMPAS.csv')
dataset.X_data.head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45,age_cat_Greater than 45,age_cat_Less than 25,c_charge_degree_F,c_charge_degree_M
0,0.0,-2.340451,1.0,-15.010999,1,0,0,0,1
1,0.0,0.0,0.0,0.0,1,0,0,1,0
2,0.0,0.0,0.0,0.0,0,0,1,1,0
3,0.0,0.0,0.0,6.0,1,0,0,0,1
4,0.0,0.0,0.0,7.513697,1,0,0,1,0


## Get metrics for a base model with a compute_model_metrics_with_config interface and input arguments as a config

In [34]:
model_name = 'DecisionTreeClassifier'
metrics_df = compute_model_metrics_with_config(models_config[model_name], model_name, dataset,
                                               config, SAVE_RESULTS_DIR_PATH,
                                               save_results=True,
                                               debug_mode=True)
print('Subgroups statistical bias and variance metrics: ')
metrics_df

Model random_state:  623
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)

Protected groups splits:
sex_priv (214, 11)
sex_dis (842, 11)
race_priv (420, 11)
race_dis (636, 11)
sex&race_priv (93, 11)
sex&race_dis (515, 11)


Top rows of processed X train + validation set: 


Unnamed: 0,juv_fel_count,age_cat_25 - 45_1,juv_misd_count,juv_other_count,age_cat_Greater than 45_1,age_cat_Greater than 45_0,age_cat_Less than 25_0,c_charge_degree_F_0,c_charge_degree_F_1,age_cat_25 - 45_0,c_charge_degree_M_1,c_charge_degree_M_0,priors_count,age_cat_Less than 25_1
3600,-0.102581,1,-0.13003,-0.149275,0,1,1,0,1,0,0,1,-0.274707,0
3043,-0.102581,0,-0.13003,-0.149275,0,1,0,1,0,1,1,0,-0.660459,1
418,-0.102581,1,-0.13003,-0.149275,0,1,1,1,0,0,1,0,-0.467583,0
3874,-0.102581,1,-0.13003,-0.149275,0,1,1,0,1,0,0,1,-0.274707,0
442,-0.102581,0,-0.13003,-0.149275,0,1,0,1,0,1,1,0,-0.660459,1
4812,-0.102581,1,-0.13003,-0.149275,0,1,1,1,0,0,1,0,-0.660459,0
4487,-0.102581,1,-1.57448,-0.149275,0,1,1,1,0,0,1,0,-0.467583,0
4968,-0.102581,0,-0.13003,-0.149275,1,0,1,0,1,1,0,1,-0.660459,0
4394,-0.102581,0,-0.13003,-0.149275,1,0,1,0,1,1,0,1,-0.081831,0
3122,-0.102581,1,-0.13003,-0.149275,0,1,1,1,0,0,1,0,-0.660459,0






2023-01-27 00:30:40 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:00<00:00, 172.04it/s]






2023-01-27 00:30:41 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:30:45 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics


Subgroups statistical bias and variance metrics: 


Unnamed: 0,Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Seed
0,General_Ensemble_Accuracy,0.667614,0.696262,0.660333,0.647619,0.680818,0.655914,0.669903,623
1,Mean,0.524041,0.558159,0.51537,0.577132,0.488981,0.594027,0.479205,623
2,Std,0.074339,0.075245,0.074109,0.069531,0.077515,0.076792,0.078327,623
3,IQR,0.0878,0.082206,0.089221,0.079059,0.093572,0.0786,0.095591,623
4,Entropy,0.192341,0.180935,0.19524,0.161033,0.213016,0.171565,0.218861,623
5,Jitter,0.120997,0.111341,0.123452,0.101976,0.133559,0.105502,0.137724,623
6,Per_Sample_Accuracy,0.656477,0.69528,0.646615,0.645976,0.663412,0.653763,0.648427,623
7,Label_Stability,0.837727,0.851682,0.834181,0.861095,0.822296,0.860645,0.81701,623
8,TPR,0.631356,0.618421,0.633838,0.47205,0.713826,0.472222,0.708487,623
9,TNR,0.696918,0.73913,0.683857,0.756757,0.649231,0.77193,0.627049,623


## Get metrics for a list of models with a run_metrics_computation_with_config interface and input arguments as a config

In [35]:
models_metrics_dct = run_metrics_computation_with_config(dataset, config, models_config, SAVE_RESULTS_DIR_PATH, debug_mode=True)

Analyze models in one run:   0%|[31m          [0m| 0/2 [00:00<?, ?it/s]

##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  491
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)

Protected groups splits:
sex_priv (194, 11)
sex_dis (862, 11)
race_priv (394, 11)
race_dis (662, 11)
sex&race_priv (98, 11)
sex&race_dis (566, 11)


Top rows of processed X train + validation set: 


Unnamed: 0,juv_fel_count,age_cat_25 - 45_1,juv_misd_count,juv_other_count,age_cat_Greater than 45_1,age_cat_Greater than 45_0,age_cat_Less than 25_0,c_charge_degree_F_0,c_charge_degree_F_1,age_cat_25 - 45_0,c_charge_degree_M_1,c_charge_degree_M_0,priors_count,age_cat_Less than 25_1
2244,-0.093219,1,-0.130521,-0.15406,0,1,1,0,1,0,0,1,-0.083401,0
2266,-0.093219,1,-0.130521,-0.15406,0,1,1,0,1,0,0,1,0.862647,0
78,-0.093219,1,-0.130521,-0.15406,0,1,1,0,1,0,0,1,-0.651029,0
3323,-0.093219,0,-0.130521,-0.15406,1,0,1,0,1,1,0,1,-0.651029,0
1127,-4.406383,1,-0.130521,3.785338,0,1,1,0,1,0,0,1,-4.495524,0
4793,-0.093219,1,-0.130521,-0.15406,0,1,1,0,1,0,0,1,0.862647,0
3494,-0.093219,1,-0.130521,-0.15406,0,1,1,1,0,0,1,0,-0.083401,0
1289,-0.250909,1,1.104331,3.785338,0,1,1,0,1,0,0,1,2.085418,0
1530,-0.093219,1,-0.130521,-0.15406,0,1,1,0,1,0,0,1,0.295018,0
1636,-0.093219,0,-0.130521,-0.15406,1,0,1,0,1,1,0,1,0.314656,0






2023-01-27 00:30:55 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap

Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:00<00:00, 159.14it/s]






2023-01-27 00:30:55 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:31:00 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics



[DecisionTreeClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Seed,Model_Name
0,General_Ensemble_Accuracy,0.703598,0.670103,0.711137,0.677665,0.719033,0.642857,0.722615,491,DecisionTreeClassifier
1,Mean,0.521465,0.567385,0.511131,0.582361,0.485222,0.593591,0.475824,491,DecisionTreeClassifier
2,Std,0.073755,0.080002,0.072348,0.068526,0.076867,0.081239,0.076549,491,DecisionTreeClassifier
3,IQR,0.0808,0.08844,0.079081,0.076076,0.083612,0.083395,0.08192,491,DecisionTreeClassifier
4,Entropy,0.219793,0.233656,0.216673,0.189168,0.238021,0.206719,0.234097,491,DecisionTreeClassifier
5,Jitter,0.13963,0.148898,0.137544,0.120191,0.151199,0.129994,0.148316,491,DecisionTreeClassifier
6,Per_Sample_Accuracy,0.68142,0.68067,0.681589,0.677589,0.683701,0.690612,0.685936,491,DecisionTreeClassifier
7,Label_Stability,0.807689,0.795155,0.81051,0.83264,0.79284,0.826939,0.797951,491,DecisionTreeClassifier
8,TPR,0.679359,0.523077,0.702765,0.52349,0.745714,0.433333,0.761905,491,DecisionTreeClassifier
9,TNR,0.725314,0.744186,0.719626,0.771429,0.689103,0.735294,0.673307,491,DecisionTreeClassifier


Analyze models in one run:  50%|[31m█████     [0m| 1/2 [00:15<00:15, 15.99s/it]





##############################  [Model 2 / 2] Analyze LogisticRegression  ##############################
Model random_state:  492
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)

Protected groups splits:
sex_priv (223, 11)
sex_dis (833, 11)
race_priv (402, 11)
race_dis (654, 11)
sex&race_priv (107, 11)
sex&race_dis (538, 11)


Top rows of processed X train + validation set: 


Unnamed: 0,juv_fel_count,age_cat_25 - 45_1,juv_misd_count,juv_other_count,age_cat_Greater than 45_1,age_cat_Greater than 45_0,age_cat_Less than 25_0,c_charge_degree_F_0,c_charge_degree_F_1,age_cat_25 - 45_0,c_charge_degree_M_1,c_charge_degree_M_0,priors_count,age_cat_Less than 25_1
658,-0.089367,1,-0.125836,-0.146782,0,1,1,0,1,0,0,1,-0.648738,0
4149,-0.089367,0,-0.125836,-0.146782,0,1,0,0,1,1,0,1,-0.270758,1
2851,-0.089367,1,-0.125836,-0.146782,0,1,1,0,1,0,0,1,-0.459748,0
1331,-0.089367,1,-0.125836,-0.146782,0,1,1,0,1,0,0,1,-0.648738,0
3069,-0.089367,1,-0.125836,-0.146782,0,1,1,0,1,0,0,1,-0.270758,0
4670,-0.089367,1,-0.125836,-0.146782,0,1,1,0,1,0,0,1,-0.270758,0
616,-0.089367,0,-0.125836,-0.146782,0,1,0,0,1,1,0,1,-0.270758,1
4413,-0.089367,0,-0.125836,-0.146782,0,1,0,0,1,1,0,1,-0.270758,1
1244,-0.089367,0,-0.125836,-0.146782,1,0,1,0,1,1,0,1,-0.606756,0
947,-0.089367,0,-0.125836,-0.146782,0,1,0,0,1,1,0,1,0.863181,1






2023-01-27 00:31:11 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap

Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:05<00:00, 19.17it/s]






2023-01-27 00:31:16 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:31:19 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Seed,Model_Name
0,General_Ensemble_Accuracy,0.679924,0.659193,0.685474,0.671642,0.685015,0.672897,0.693309,492,LogisticRegression
1,Mean,0.520122,0.565524,0.507968,0.581083,0.482651,0.606199,0.472871,492,LogisticRegression
2,Std,0.02152,0.019211,0.022138,0.01999,0.022461,0.019146,0.023148,492,LogisticRegression
3,IQR,0.027995,0.024942,0.028812,0.026028,0.029203,0.024786,0.030091,492,LogisticRegression
4,Entropy,0.079387,0.0,0.080575,0.077811,0.0,0.0,0.0,492,LogisticRegression
5,Jitter,0.049364,0.045588,0.050375,0.049021,0.049576,0.040521,0.049428,492,LogisticRegression
6,Per_Sample_Accuracy,0.674536,0.652108,0.68054,0.664229,0.680872,0.663364,0.689312,492,LogisticRegression
7,Label_Stability,0.935625,0.942422,0.933806,0.934527,0.9363,0.946168,0.935725,492,LogisticRegression
8,TPR,0.63286,0.455696,0.666667,0.44,0.717201,0.323529,0.741611,492,LogisticRegression
9,TNR,0.721137,0.770833,0.704057,0.809524,0.649518,0.835616,0.633333,492,LogisticRegression


Analyze models in one run: 100%|[31m██████████[0m| 2/2 [00:34<00:00, 17.23s/it]








