In [21]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [23]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /home/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [24]:
import os
import pandas as pd
from datetime import datetime, timezone

from configs.models_config_for_tuning import MODELS_CONFIG
from source.custom_initializers import create_config_obj, create_models_config_from_tuned_params_df
from source.custom_classes.data_loaders import CompasWithoutSensitiveAttrsDataset
from source.metrics_computation_interfaces import compute_metrics_multiple_runs

## Configs

In [25]:
config = create_config_obj(config_yaml_path=os.path.join('configs', 'experiment1_compas_config.yaml'))

TUNED_PARAMS_FILE_PATH = os.path.join('results', 'models_tuning', f'tuning_results_{config.dataset_name}_20230117__012024.csv')
SAVE_RESULTS_DIR_PATH = os.path.join('results', 'hypothesis_space',
                                     f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime("%Y%m%d__%H%M%S")}')

## Models tuned hyper-parameters

In [26]:
models_tuned_params_df = pd.read_csv(TUNED_PARAMS_FILE_PATH)
models_tuned_params_df

Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6785,0.6837,"{'C': 1, 'max_iter': 50, 'penalty': 'l2', 'sol..."
1,COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6952,0.6989,"{'criterion': 'gini', 'max_depth': 20, 'max_fe..."
2,COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6962,0.7008,"{'max_depth': 4, 'max_features': 0.6, 'min_sam..."
3,COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6971,0.7008,"{'lambda': 100, 'learning_rate': 0.1, 'max_dep..."
4,COMPAS_Without_Sensitive_Attributes,KNeighborsClassifier,0.7056,0.7112,"{'metric': 'minkowski', 'n_neighbors': 25, 'we..."
5,COMPAS_Without_Sensitive_Attributes,MLPClassifier,0.6834,0.6875,"{'activation': 'relu', 'hidden_layer_sizes': (..."


In [27]:
experiment_models_config = create_models_config_from_tuned_params_df(
    models_config_for_tuning=MODELS_CONFIG,
    models_tuned_params_df=models_tuned_params_df,
)

In [28]:
experiment_models_config.keys()

dict_keys(['DecisionTreeClassifier', 'RandomForestClassifier'])

## Load dataset

In [29]:
dataset = CompasWithoutSensitiveAttrsDataset(dataset_path='data/COMPAS.csv')
dataset.X_data.head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45,age_cat_Greater than 45,age_cat_Less than 25,c_charge_degree_F,c_charge_degree_M
0,0.0,-2.340451,1.0,-15.010999,1,0,0,0,1
1,0.0,0.0,0.0,0.0,1,0,0,1,0
2,0.0,0.0,0.0,0.0,0,0,1,1,0
3,0.0,0.0,0.0,6.0,1,0,0,0,1
4,0.0,0.0,0.0,7.513697,1,0,0,1,0


## Run experiments

In [30]:
multiple_runs_metrics_dct = compute_metrics_multiple_runs(dataset, config, experiment_models_config, SAVE_RESULTS_DIR_PATH, debug_mode=False)

Analyze models in one run:   0%|          | 0/2 [00:00<?, ?it/s]

##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  101
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)


2023-01-24 14:02:41 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap







Classifiers testing by bootstrap:   0%|          | 0/100 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  14%|█▍        | 14/100 [00:00<00:00, 134.67it/s][A
Classifiers testing by bootstrap:  30%|███       | 30/100 [00:00<00:00, 146.14it/s][A
Classifiers testing by bootstrap:  48%|████▊     | 48/100 [00:00<00:00, 161.09it/s][A
Classifiers testing by bootstrap:  65%|██████▌   | 65/100 [00:00<00:00, 158.49it/s][A
Classifiers testing by bootstrap:  81%|████████  | 81/100 [00:00<00:00, 155.83it/s][A
Classifiers testing by bootstrap: 100%|██████████| 100/100 [00:00<00:00, 156.38it/s][A
2023-01-24 14:02:41 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap






2023-01-24 14:02:44 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.678
Mean: 0.5266
Std: 0.073
IQR: 0.0867
Entropy: 0.0
Jitter: 0.1194
Per sample accuracy: 0.663
Label stability: 0.8361




Analyze models in one run:  50%|█████     | 1/2 [00:11<00:11, 11.40s/it]2023-01-24 14:02:52 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap






##############################  [Model 2 / 2] Analyze RandomForestClassifier  ##############################
Model random_state:  102
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)





Classifiers testing by bootstrap:   0%|          | 0/100 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:   1%|          | 1/100 [00:01<01:47,  1.08s/it][A
Classifiers testing by bootstrap:   2%|▏         | 2/100 [00:02<01:46,  1.09s/it][A
Classifiers testing by bootstrap:   3%|▎         | 3/100 [00:03<01:44,  1.08s/it][A
Classifiers testing by bootstrap:   4%|▍         | 4/100 [00:04<01:45,  1.10s/it][A
Classifiers testing by bootstrap:   5%|▌         | 5/100 [00:05<01:44,  1.10s/it][A
Classifiers testing by bootstrap:   6%|▌         | 6/100 [00:06<01:42,  1.09s/it][A
Classifiers testing by bootstrap:   7%|▋         | 7/100 [00:07<01:40,  1.08s/it][A
Classifiers testing by bootstrap:   8%|▊         | 8/100 [00:08<01:39,  1.08s/it][A
Classifiers testing by bootstrap:   9%|▉         | 9/100 [00:09<01:39,  1.09s/it][A
Classifiers testing by bootstrap:  10%|█         | 10/100 [00:10<01:38,  1.10s/it][A
Classifiers testing by bootstrap:  11%|█         | 11/100 [00:11<01:36,





2023-01-24 14:04:40 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.6932
Mean: 0.5365
Std: 0.0377
IQR: 0.0501
Entropy: 0.118
Jitter: 0.0721
Per sample accuracy: 0.6818
Label stability: 0.9002




Analyze models in one run: 100%|██████████| 2/2 [02:05<00:00, 62.90s/it]








Analyze models in one run:   0%|          | 0/2 [00:00<?, ?it/s]2023-01-24 14:04:46 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap


##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  201
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)





Classifiers testing by bootstrap:   0%|          | 0/100 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  18%|█▊        | 18/100 [00:00<00:00, 176.86it/s][A
Classifiers testing by bootstrap:  37%|███▋      | 37/100 [00:00<00:00, 180.47it/s][A
Classifiers testing by bootstrap:  56%|█████▌    | 56/100 [00:00<00:00, 181.22it/s][A
Classifiers testing by bootstrap: 100%|██████████| 100/100 [00:00<00:00, 194.96it/s][A
2023-01-24 14:04:47 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap






2023-01-24 14:04:50 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.6723
Mean: 0.5235
Std: 0.072
IQR: 0.0834
Entropy: 0.0
Jitter: 0.1109
Per sample accuracy: 0.6663
Label stability: 0.8605




Analyze models in one run:  50%|█████     | 1/2 [00:10<00:10, 10.74s/it]2023-01-24 14:04:57 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap






##############################  [Model 2 / 2] Analyze RandomForestClassifier  ##############################
Model random_state:  202
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)





Classifiers testing by bootstrap:   0%|          | 0/100 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:   1%|          | 1/100 [00:00<01:36,  1.02it/s][A
Classifiers testing by bootstrap:   2%|▏         | 2/100 [00:02<01:39,  1.01s/it][A
Classifiers testing by bootstrap:   3%|▎         | 3/100 [00:03<01:38,  1.01s/it][A
Classifiers testing by bootstrap:   4%|▍         | 4/100 [00:04<01:41,  1.05s/it][A
Classifiers testing by bootstrap:   5%|▌         | 5/100 [00:05<01:36,  1.02s/it][A
Classifiers testing by bootstrap:   6%|▌         | 6/100 [00:06<01:33,  1.01it/s][A
Classifiers testing by bootstrap:   7%|▋         | 7/100 [00:06<01:30,  1.03it/s][A
Classifiers testing by bootstrap:   8%|▊         | 8/100 [00:07<01:27,  1.05it/s][A
Classifiers testing by bootstrap:   9%|▉         | 9/100 [00:08<01:26,  1.05it/s][A
Classifiers testing by bootstrap:  10%|█         | 10/100 [00:09<01:29,  1.00it/s][A
Classifiers testing by bootstrap:  11%|█         | 11/100 [00:10<01:27,





2023-01-24 14:06:39 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.6686
Mean: 0.5239
Std: 0.0386
IQR: 0.0511
Entropy: 0.0
Jitter: 0.0541
Per sample accuracy: 0.6659
Label stability: 0.9327




Analyze models in one run: 100%|██████████| 2/2 [01:59<00:00, 59.68s/it]








