In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [3]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("..")

print('Current location: ', os.getcwd())

Current location:  /home/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [4]:
import os
import pandas as pd
from datetime import datetime, timezone

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from source.custom_initializers import create_config_obj
from source.custom_classes.data_loaders import CompasWithoutSensitiveAttrsDataset
from source.metrics_computation_interfaces import run_metrics_computation, compute_model_metrics

## Configs

In [5]:
config = create_config_obj(config_yaml_path=os.path.join('configs', 'experiment1_compas_config.yaml'))
SAVE_RESULTS_DIR_PATH = os.path.join('results', 'hypothesis_space',
                                     f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime("%Y%m%d__%H%M%S")}')

In [6]:
models_config = {
    'DecisionTreeClassifier': DecisionTreeClassifier(criterion='gini',
                                                     max_depth=20,
                                                     max_features=0.6,
                                                     min_samples_split=0.1),
    'LogisticRegression': LogisticRegression(C=1,
                                             max_iter=50,
                                             penalty='l2',
                                             solver='newton-cg'),
}

## Load dataset

In [7]:
dataset = CompasWithoutSensitiveAttrsDataset(dataset_path='data/COMPAS.csv')
dataset.X_data.head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45,age_cat_Greater than 45,age_cat_Less than 25,c_charge_degree_F,c_charge_degree_M
0,0.0,-2.340451,1.0,-15.010999,1,0,0,0,1
1,0.0,0.0,0.0,0.0,1,0,0,1,0
2,0.0,0.0,0.0,0.0,0,0,1,1,0
3,0.0,0.0,0.0,6.0,1,0,0,0,1
4,0.0,0.0,0.0,7.513697,1,0,0,1,0


## Get metrics for a base model with a compute_model_metrics function and input arguments

In [8]:
model_name = 'DecisionTreeClassifier'
metrics_df = compute_model_metrics(models_config[model_name], config.n_estimators,
                                   dataset, config.test_set_fraction,
                                   config.bootstrap_fraction, config.sensitive_attributes_dct,
                                   model_seed=101,
                                   dataset_name=config.dataset_name,
                                   base_model_name=model_name,
                                   save_results=True,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   debug_mode=False)
print('Subgroups statistical bias and variance metrics: ')
metrics_df

Model random_state:  101
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)




2023-01-27 00:50:32 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:00<00:00, 176.21it/s]






2023-01-27 00:50:32 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:50:36 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics


Subgroups statistical bias and variance metrics: 


Unnamed: 0,Metric,overall,sex_priv,sex_dis,race_priv,race_dis,sex&race_priv,sex&race_dis,Model_Seed
0,General_Ensemble_Accuracy,0.679924,0.693467,0.676779,0.669789,0.686804,0.659091,0.679537,101
1,Mean,0.525578,0.56169,0.517193,0.590746,0.481339,0.589092,0.468776,101
2,Std,0.071635,0.078384,0.070068,0.069772,0.0729,0.088442,0.073433,101
3,IQR,0.089278,0.096598,0.087578,0.090645,0.08835,0.113437,0.089443,101
4,Entropy,0.0,0.216088,0.0,0.0,0.207275,0.219363,0.205943,101
5,Jitter,0.122677,0.139908,0.118676,0.109246,0.131795,0.141933,0.130401,101
6,Per_Sample_Accuracy,0.662689,0.684724,0.657573,0.65918,0.665072,0.649773,0.654923,101
7,Label_Stability,0.830152,0.798191,0.837573,0.843794,0.82089,0.792727,0.824826,101
8,TPR,0.622177,0.557143,0.633094,0.459119,0.70122,0.44,0.713781,101
9,TNR,0.72935,0.767442,0.718182,0.794776,0.671096,0.746032,0.638298,101


## Get metrics for a list of models with a run_metrics_computation function and input arguments

In [9]:
models_metrics_dct = run_metrics_computation(dataset, config.test_set_fraction, config.bootstrap_fraction,
                                             config.dataset_name, models_config, config.n_estimators,
                                             config.sensitive_attributes_dct,
                                             model_seed=200,
                                             save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                             save_results=True,
                                             debug_mode=False)

Analyze models in one run:   0%|[31m          [0m| 0/2 [00:00<?, ?it/s]

##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  201
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)




2023-01-27 00:50:44 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap

Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:00<00:00, 205.15it/s]






2023-01-27 00:50:44 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:50:47 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
Analyze models in one run:  50%|[31m█████     [0m| 1/2 [00:11<00:11, 11.13s/it]





##############################  [Model 2 / 2] Analyze LogisticRegression  ##############################
Model random_state:  202
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)




2023-01-27 00:50:55 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap

Classifiers testing by bootstrap: 100%|[34m██████████[0m| 100/100 [00:05<00:00, 19.05it/s]






2023-01-27 00:51:00 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
2023-01-27 00:51:03 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
Analyze models in one run: 100%|[31m██████████[0m| 2/2 [00:27<00:00, 13.55s/it]








