In [20]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [22]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /home/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [23]:
import os
import pandas as pd
from datetime import datetime, timezone

from configs.models_config_for_tuning import MODELS_CONFIG
from source.custom_initializers import create_config_obj, create_models_config_from_tuned_params_df
from source.custom_classes.data_loaders import ACSEmploymentDataset
from source.metrics_computation_interfaces import compute_metrics_multiple_runs

## Configs

In [24]:
config = create_config_obj(config_yaml_path=os.path.join('configs', 'experiment1_folktables_GA_2018_config.yaml'))

TUNED_PARAMS_FILE_PATH = os.path.join('results', 'models_tuning', f'tuning_results_{config.dataset_name}_20230117__023240.csv')
SAVE_RESULTS_DIR_PATH = os.path.join('results', 'hypothesis_space',
                                     f'{config.dataset_name}_Metrics_{datetime.now(timezone.utc).strftime("%Y%m%d__%H%M%S")}')

## Models tuned hyper-parameters

In [25]:
models_tuned_params_df = pd.read_csv(TUNED_PARAMS_FILE_PATH)
models_tuned_params_df

Unnamed: 0.1,Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,0,Folktables_GA_2018,LogisticRegression,0.8195,0.8203,"{'C': 1, 'max_iter': 150, 'penalty': 'l2', 'so..."
1,1,Folktables_GA_2018,DecisionTreeClassifier,0.8304,0.8313,"{'criterion': 'gini', 'max_depth': 10, 'max_fe..."
2,2,Folktables_GA_2018,RandomForestClassifier,0.8354,0.8357,"{'max_depth': 10, 'max_features': 0.6, 'min_sa..."
3,3,Folktables_GA_2018,XGBClassifier,0.8371,0.8375,"{'lambda': 1, 'learning_rate': 0.1, 'max_depth..."
4,4,Folktables_GA_2018,KNeighborsClassifier,0.8136,0.8137,"{'metric': 'minkowski', 'n_neighbors': 25, 'we..."
5,5,Folktables_GA_2018,MLPClassifier,0.8345,0.8353,"{'activation': 'logistic', 'hidden_layer_sizes..."


In [26]:
experiment_models_config = create_models_config_from_tuned_params_df(
    models_config_for_tuning=MODELS_CONFIG,
    models_tuned_params_df=models_tuned_params_df,
)

In [27]:
experiment_models_config.keys()

dict_keys(['DecisionTreeClassifier', 'KNeighborsClassifier'])

## Load dataset

In [28]:
dataset = ACSEmploymentDataset(state=['GA'], year=2018, root_dir=os.path.join('data'), with_nulls=False, subsample=20000)
dataset.X_data.head()

Unnamed: 0,MAR,MIL,ESP,MIG,DREM,NATIVITY,DIS,DEAR,DEYE,SEX,RAC1P,RELP,CIT,ANC,SCHL,AGEP
69773,5,0,7,1,2,1,2,2,2,2,2,2,1,1,3,6
88669,3,4,0,1,2,1,2,2,2,2,1,0,1,2,22,71
1682,5,2,0,1,2,1,2,2,2,1,2,16,1,1,16,54
6242,5,4,0,1,2,1,2,2,2,2,2,11,1,4,20,46
43040,5,4,0,1,2,1,2,2,2,2,1,0,1,1,21,31


## Run experiments

In [29]:
multiple_runs_metrics_dct = compute_metrics_multiple_runs(dataset, config, experiment_models_config, SAVE_RESULTS_DIR_PATH, debug_mode=False)

Analyze models in one run:   0%|          | 0/2 [00:00<?, ?it/s]2023-01-24 01:27:46 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap


##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  101
Baseline X_train shape:  (16000, 16)
Baseline X_test shape:  (4000, 16)





Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  30%|███       | 3/10 [00:00<00:00, 26.56it/s][A
Classifiers testing by bootstrap:  60%|██████    | 6/10 [00:00<00:00, 27.10it/s][A
Classifiers testing by bootstrap: 100%|██████████| 10/10 [00:00<00:00, 27.04it/s][A
2023-01-24 01:27:46 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap






2023-01-24 01:27:47 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.8082
Mean: 0.5485
Std: 0.0461
IQR: 0.0454
Entropy: 0.0328
Jitter: 0.0219
Per sample accuracy: 0.808
Label stability: 0.9761




Analyze models in one run:  50%|█████     | 1/2 [00:03<00:03,  3.57s/it]2023-01-24 01:27:49 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap






##############################  [Model 2 / 2] Analyze KNeighborsClassifier  ##############################
Model random_state:  None
Baseline X_train shape:  (16000, 16)
Baseline X_test shape:  (4000, 16)





Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  10%|█         | 1/10 [00:00<00:03,  2.30it/s][A
Classifiers testing by bootstrap:  20%|██        | 2/10 [00:00<00:02,  3.44it/s][A
Classifiers testing by bootstrap:  30%|███       | 3/10 [00:01<00:02,  2.64it/s][A
Classifiers testing by bootstrap:  40%|████      | 4/10 [00:01<00:01,  3.25it/s][A
Classifiers testing by bootstrap:  50%|█████     | 5/10 [00:01<00:01,  3.44it/s][A
Classifiers testing by bootstrap:  60%|██████    | 6/10 [00:01<00:01,  3.88it/s][A
Classifiers testing by bootstrap:  70%|███████   | 7/10 [00:01<00:00,  4.21it/s][A
Classifiers testing by bootstrap:  80%|████████  | 8/10 [00:02<00:00,  4.47it/s][A
Classifiers testing by bootstrap:  90%|█████████ | 9/10 [00:02<00:00,  4.45it/s][A
Classifiers testing by bootstrap: 100%|██████████| 10/10 [00:03<00:00,  3.29it/s][A
2023-01-24 01:27:52 abstract_overall_variance_analyzer.py INFO    : Successfully t





2023-01-24 01:27:53 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.8005
Mean: 0.5211
Std: 0.068
IQR: 0.081
Entropy: 0.0
Jitter: 0.0902
Per sample accuracy: 0.7892
Label stability: 0.8826




Analyze models in one run: 100%|██████████| 2/2 [00:09<00:00,  4.86s/it]








Analyze models in one run:   0%|          | 0/2 [00:00<?, ?it/s]2023-01-24 01:27:55 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap


##############################  [Model 1 / 2] Analyze DecisionTreeClassifier  ##############################
Model random_state:  201
Baseline X_train shape:  (16000, 16)
Baseline X_test shape:  (4000, 16)





Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  40%|████      | 4/10 [00:00<00:00, 34.32it/s][A
Classifiers testing by bootstrap: 100%|██████████| 10/10 [00:00<00:00, 33.00it/s][A
2023-01-24 01:27:56 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap






2023-01-24 01:27:57 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.828
Mean: 0.5566
Std: 0.0517
IQR: 0.0526
Entropy: 0.0
Jitter: 0.0529
Per sample accuracy: 0.8212
Label stability: 0.9261




Analyze models in one run:  50%|█████     | 1/2 [00:03<00:03,  3.25s/it]2023-01-24 01:27:59 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap






##############################  [Model 2 / 2] Analyze KNeighborsClassifier  ##############################
Model random_state:  None
Baseline X_train shape:  (16000, 16)
Baseline X_test shape:  (4000, 16)





Classifiers testing by bootstrap:   0%|          | 0/10 [00:00<?, ?it/s][A
Classifiers testing by bootstrap:  10%|█         | 1/10 [00:00<00:02,  4.43it/s][A
Classifiers testing by bootstrap:  20%|██        | 2/10 [00:00<00:01,  4.51it/s][A
Classifiers testing by bootstrap:  30%|███       | 3/10 [00:00<00:01,  4.68it/s][A
Classifiers testing by bootstrap:  40%|████      | 4/10 [00:01<00:01,  3.49it/s][A
Classifiers testing by bootstrap:  50%|█████     | 5/10 [00:01<00:01,  3.91it/s][A
Classifiers testing by bootstrap:  60%|██████    | 6/10 [00:01<00:00,  4.01it/s][A
Classifiers testing by bootstrap:  70%|███████   | 7/10 [00:01<00:00,  4.25it/s][A
Classifiers testing by bootstrap:  80%|████████  | 8/10 [00:01<00:00,  4.39it/s][A
Classifiers testing by bootstrap:  90%|█████████ | 9/10 [00:02<00:00,  4.27it/s][A
Classifiers testing by bootstrap: 100%|██████████| 10/10 [00:02<00:00,  4.23it/s][A
2023-01-24 01:28:01 abstract_overall_variance_analyzer.py INFO    : Successfully t





2023-01-24 01:28:02 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics




##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.7968
Mean: 0.5118
Std: 0.0676
IQR: 0.0818
Entropy: 0.0
Jitter: 0.0895
Per sample accuracy: 0.7876
Label stability: 0.8831




Analyze models in one run: 100%|██████████| 2/2 [00:08<00:00,  4.30s/it]








