In [15]:
# !pip install -r ./requirements.txt

In [2]:
# !pip uninstall virny -y

In [4]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@development

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@development

In [5]:
# !pip install aif360

In [6]:
# !pip install BlackBoxAuditing==0.1.54

In [14]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [16]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /home/dh3553/projects/fairness-variance


## Import dependencies

In [17]:
import os
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import RicciDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_folktables_employment_models_params_for_tuning

from source.preprocessing import get_simple_preprocessor
from source.experiment_interface import run_exp_iter_with_disparate_impact

## Define Input Variables

In [18]:
# ROOT_DIR = os.path.join(os.getcwd(), "..", "..")
ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'one_repair_lvl_many_models_ricci'
DB_COLLECTION_NAME = 'one_repair_lvl_many_models'
DATASET_NAME = 'RicciDataset'
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME)
FAIR_INTERVENTION_PARAMS_LST = [0.0, 0.4, 0.7]

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', EXPERIMENT_NAME, 'ricci_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [19]:
import os
from dotenv import load_dotenv

load_dotenv('./configs/secrets.env')
os.getenv("DB_NAME")

'fairness_variance'

In [20]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [21]:
import uuid

custom_table_fields_dct = {
#     'session_uuid': str(uuid.uuid4()),
    'session_uuid': 'd6a4d686-4369-4bca-95c8-7be5d0740b15',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

Current session uuid:  d6a4d686-4369-4bca-95c8-7be5d0740b15


## Initialize custom objects

In [22]:
data_loader = RicciDataset()
data_loader.X_data.head()

Unnamed: 0,Oral,Written,Combine,Position,Race
0,89.52,95,92.808,Captain,White
1,80.0,95,89.0,Captain,White
2,82.38,87,85.152,Captain,White
3,88.57,76,81.028,Captain,White
4,76.19,84,80.876,Captain,White


In [23]:
data_loader.y_data.value_counts()

0    62
1    56
Name: Promoted, dtype: int64

In [24]:
data_loader.X_data.shape

(118, 5)

In [25]:
data_loader.X_data['Race'].value_counts()

White        68
Non-White    50
Name: Race, dtype: int64

## Run experiment iterations

### Experiment iteration 1

In [13]:
# tuned_params_filenames = ['tuning_results_Ricci_alpha_0.0_20230806__143119.csv']
# tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
#                          for tuned_params_filename in tuned_params_filenames]

In [14]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [15]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   with_tuning=True,
#                                    with_tuning=False,
#                                    tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True,
                                   dataset_name=DATASET_NAME)

2023-08-06 16:49:02 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 100,
 'experiment_iteration': 'Exp_iter_1',
 'fair_intervention_params_lst': '[0.0]',
 'model_init_seed': 100,
 'session_uuid': 'd6a4d686-4369-4bca-95c8-7be5d0740b15'}




2023-08-06 16:49:02 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([ 84,  85,  37,  45,  97,  26, 101, 112,  21,  33,  11,  46,  96,
             25,  32,  99, 108,  51,  29,  64],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([ 84,  85,  37,  45,  97,  26, 101, 112,  21,  33,  11,  46,  96,
             25,  32,  99, 108,  51,  29,  64],
           dtype='int64')


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

intervention_param:  0.0
2023/08/06, 16:49:02: Tuning LGBMClassifier...
2023/08/06, 16:49:04: Tuning for LGBMClassifier is finished [F1 score = 0.3696969696969697, Accuracy = 0.5897435897435898]

2023/08/06, 16:49:04: Tuning LogisticRegression...
2023/08/06, 16:49:04: Tuning for LogisticRegression is finished [F1 score = 0.9474747474747475, Accuracy = 0.9487179487179488]

2023/08/06, 16:49:04: Tuning RandomForestClassifier...
2023/08/06, 16:49:39: Tuning for RandomForestClassifier is finished [F1 score = 1.0, Accuracy = 1.0]

2023/08/06, 16:49:39: Tuning MLPClassifier...


2023-08-06 16:49:40 experiment_interface.py INFO    : Models are tuned and saved to a file
INFO:root:Models are tuned and saved to a file


2023/08/06, 16:49:40: Tuning for MLPClassifier is finished [F1 score = 1.0, Accuracy = 1.0]



Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

##############################  [Model 1 / 4] Analyze LGBMClassifier  ##############################
Model seed:  101

Protected groups splits:
Race_priv (17, 1)
Race_dis (22, 1)




2023-08-06 16:49:40 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]







2023-08-06 16:49:41 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:49:41 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LGBMClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.427778,0.427778,0.427778,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
1,Std,0.062055,0.062055,0.062055,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
2,IQR,0.079365,0.079365,0.079365,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
3,Aleatoric_Uncertainty,0.973509,0.973509,0.973509,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
4,Overall_Uncertainty,0.984897,0.984897,0.984897,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
5,Statistical_Bias,0.531481,0.512745,0.54596,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
6,Jitter,0.196784,0.196784,0.196784,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
7,Per_Sample_Accuracy,0.33,0.431176,0.251818,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
8,Label_Stability,0.78,0.78,0.78,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
9,TPR,1.0,1.0,1.0,101,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."






##############################  [Model 2 / 4] Analyze LogisticRegression  ##############################
Model seed:  101

Protected groups splits:
Race_priv (17, 1)
Race_dis (22, 1)




2023-08-06 16:49:42 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:49:43 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:49:44 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.542422,0.402698,0.65039,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
1,Std,0.045251,0.045876,0.044769,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
2,IQR,0.059114,0.061516,0.057257,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
3,Aleatoric_Uncertainty,0.752689,0.759723,0.747253,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
4,Overall_Uncertainty,0.761071,0.768173,0.755582,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
5,Statistical_Bias,0.287394,0.334435,0.251044,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
6,Jitter,0.091537,0.127694,0.063598,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
7,Per_Sample_Accuracy,0.837692,0.693235,0.949318,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
8,Label_Stability,0.865128,0.821765,0.898636,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
9,TPR,1.0,1.0,1.0,101,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."






##############################  [Model 3 / 4] Analyze RandomForestClassifier  ##############################
Model seed:  101

Protected groups splits:
Race_priv (17, 1)
Race_dis (22, 1)




2023-08-06 16:49:44 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:50:09 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:50:10 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.662933,0.483874,0.801298,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
1,Std,0.053466,0.077212,0.035116,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
2,IQR,0.068205,0.107794,0.037614,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
3,Aleatoric_Uncertainty,0.267774,0.376616,0.183669,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
4,Overall_Uncertainty,0.302109,0.425447,0.206802,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
5,Statistical_Bias,0.089915,0.152262,0.041739,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
6,Jitter,0.037619,0.085126,0.000909,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
7,Per_Sample_Accuracy,0.960513,0.91,0.999545,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
8,Label_Stability,0.940513,0.864706,0.999091,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
9,TPR,1.0,1.0,1.0,101,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."






##############################  [Model 4 / 4] Analyze MLPClassifier  ##############################
Model seed:  101

Protected groups splits:
Race_priv (17, 1)
Race_dis (22, 1)




2023-08-06 16:50:11 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:50:28 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:50:28 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[MLPClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.685829,0.499715,0.829644,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
1,Std,0.069973,0.139455,0.016282,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
2,IQR,0.082799,0.163163,0.0207,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
3,Aleatoric_Uncertainty,0.048079,0.087151,0.017886,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
4,Overall_Uncertainty,0.14876,0.290915,0.038914,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
5,Statistical_Bias,0.049605,0.098594,0.011749,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
6,Jitter,0.06428,0.126527,0.016181,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
7,Per_Sample_Accuracy,0.954615,0.909412,0.989545,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
8,Label_Stability,0.909231,0.818824,0.979091,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
9,TPR,1.0,1.0,1.0,101,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."








2023-08-06 16:50:30 experiment_interface.py INFO    : Experiment run was successful!
INFO:root:Experiment run was successful!


### Experiment iteration 2

In [30]:
# Configs for an experiment iteration
exp_iter_num = 2
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Ricci_alpha_0.0_20230806__204940.csv',
    'tuning_results_Ricci_alpha_0.4_20230806__205057.csv',
    'tuning_results_Ricci_alpha_0.7_20230806__205137.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [None]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True,
                                   dataset_name=DATASET_NAME)

2023-08-06 16:56:40 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 200,
 'experiment_iteration': 'Exp_iter_2',
 'fair_intervention_params_lst': '[0.0, 0.4, 0.7]',
 'model_init_seed': 200,
 'session_uuid': 'd6a4d686-4369-4bca-95c8-7be5d0740b15'}




2023-08-06 16:56:40 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([72, 66, 34, 21, 86, 9, 13, 36, 98, 2, 67, 10, 87, 110, 12, 46, 48,
            102, 104, 38],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([72, 66, 34, 21, 86, 9, 13, 36, 98, 2, 67, 10, 87, 110, 12, 46, 48,
            102, 104, 38],
           dtype='int64')


Multiple alphas:   0%|          | 0/3 [00:00<?, ?it/s]

2023-08-06 16:56:40 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/one_repair_lvl_many_models_ricci/tuning_results_Ricci_alpha_0.0_20230806__204940.csv
LGBMClassifier:  {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 3, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 20, 'objective': None, 'random_state': 200, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'min_data_in_leaf': 100}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

##############################  [Model 1 / 4] Analyze LGBMClassifier  ##############################
Model seed:  201

Protected groups splits:
Race_priv (23, 1)
Race_dis (16, 1)




2023-08-06 16:56:40 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]









2023-08-06 16:56:41 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:56:41 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LGBMClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.542222,0.542222,0.542222,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
1,Std,0.064352,0.064352,0.064352,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
2,IQR,0.063492,0.063492,0.063492,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
3,Aleatoric_Uncertainty,0.982781,0.982781,0.982781,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
4,Overall_Uncertainty,0.99485,0.99485,0.99485,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
5,Statistical_Bias,0.501083,0.516522,0.478889,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
6,Jitter,0.361357,0.361357,0.361357,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
7,Per_Sample_Accuracy,0.493205,0.396304,0.6325,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
8,Label_Stability,0.53,0.53,0.53,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
9,TPR,0.0,0.0,0.0,201,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."






##############################  [Model 2 / 4] Analyze LogisticRegression  ##############################
Model seed:  201

Protected groups splits:
Race_priv (23, 1)
Race_dis (16, 1)




2023-08-06 16:56:42 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:56:43 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:56:43 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.543455,0.401808,0.747074,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
1,Std,0.039544,0.0414,0.036876,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
2,IQR,0.053544,0.056775,0.048898,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
3,Aleatoric_Uncertainty,0.675437,0.70154,0.637913,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
4,Overall_Uncertainty,0.682847,0.709222,0.644935,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
5,Statistical_Bias,0.229713,0.242473,0.211371,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
6,Jitter,0.066672,0.063684,0.070967,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
7,Per_Sample_Accuracy,0.945385,0.95913,0.925625,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
8,Label_Stability,0.902051,0.918261,0.87875,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
9,TPR,0.9,1.0,0.5,201,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."






##############################  [Model 3 / 4] Analyze RandomForestClassifier  ##############################
Model seed:  201

Protected groups splits:
Race_priv (23, 1)
Race_dis (16, 1)




2023-08-06 16:56:44 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

### Experiment iteration 3

In [81]:
# Configs for an experiment iteration
exp_iter_num = 3
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Ricci_alpha_0.0_20230806__204940.csv',
    'tuning_results_Ricci_alpha_0.4_20230806__205057.csv',
    'tuning_results_Ricci_alpha_0.7_20230806__205137.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [82]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True,
                                   dataset_name=DATASET_NAME)

2023-08-06 13:59:42 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 300,
 'experiment_iteration': 'Exp_iter_3',
 'fair_intervention_params_lst': '[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, '
                                 '0.8, 0.9, 1.0]',
 'intervention_param': 1.0,
 'model_init_seed': 300,
 'session_uuid': '6d916473-4d0f-4247-9638-b4f140ec4b28'}




2023-08-06 13:59:42 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([ 69,  17,  62,  67,   7,  46,  14, 111,  18,  48,  29, 102,  39,
             52,  58,  83,  11,  56, 116, 113],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([ 69,  17,  62,  67,   7,  46,  14, 111,  18,  48,  29, 102,  39,
             52,  58,  83,  11,  56, 116, 113],
           dtype='int64')


Multiple alphas:   0%|          | 0/11 [00:00<?, ?it/s]

2023-08-06 13:59:42 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.0_20230806__173738.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 300, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.1
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.1_20230806__173842.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.2
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.2_20230806__173944.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.3
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.3_20230806__174046.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.4
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.4_20230806__174149.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.5
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.5_20230806__174251.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.6
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.6_20230806__174353.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.7
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.7_20230806__174456.csv
RandomForestClassifier:  {'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.8
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.8_20230806__174554.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.9
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.9_20230806__174655.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 200, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  1.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_1.0_20230806__174821.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 301, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

### Experiment iteration 4

In [83]:
# Configs for an experiment iteration
exp_iter_num = 4
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Ricci_alpha_0.0_20230806__204940.csv',
    'tuning_results_Ricci_alpha_0.4_20230806__205057.csv',
    'tuning_results_Ricci_alpha_0.7_20230806__205137.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [84]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True,
                                   dataset_name=DATASET_NAME)

2023-08-06 14:04:55 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 400,
 'experiment_iteration': 'Exp_iter_4',
 'fair_intervention_params_lst': '[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, '
                                 '0.8, 0.9, 1.0]',
 'intervention_param': 1.0,
 'model_init_seed': 400,
 'session_uuid': '6d916473-4d0f-4247-9638-b4f140ec4b28'}




2023-08-06 14:04:55 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([101,  24,  35,  10,  93,  89,  83, 108,  48,  47,  59,  58,  85,
             80,  11,  77,  71, 106,  66,  99],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([101,  24,  35,  10,  93,  89,  83, 108,  48,  47,  59,  58,  85,
             80,  11,  77,  71, 106,  66,  99],
           dtype='int64')


Multiple alphas:   0%|          | 0/11 [00:00<?, ?it/s]

2023-08-06 14:04:55 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.0_20230806__173738.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 400, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.1
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.1_20230806__173842.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.2
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.2_20230806__173944.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.3
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.3_20230806__174046.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.4
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.4_20230806__174149.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.5
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.5_20230806__174251.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.6
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.6_20230806__174353.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.7
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.7_20230806__174456.csv
RandomForestClassifier:  {'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.8
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.8_20230806__174554.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.9
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.9_20230806__174655.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 200, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  1.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_1.0_20230806__174821.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 401, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

### Experiment iteration 5

In [26]:
# Configs for an experiment iteration
exp_iter_num = 5
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Ricci_alpha_0.0_20230806__204940.csv',
    'tuning_results_Ricci_alpha_0.4_20230806__205057.csv',
    'tuning_results_Ricci_alpha_0.7_20230806__205137.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [27]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True, 
                                   dataset_name=DATASET_NAME)

2023-08-06 16:58:34 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 500,
 'experiment_iteration': 'Exp_iter_5',
 'fair_intervention_params_lst': '[0.0, 0.4, 0.7]',
 'model_init_seed': 500,
 'session_uuid': 'd6a4d686-4369-4bca-95c8-7be5d0740b15'}




2023-08-06 16:58:34 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([74, 92, 16, 54, 50, 81, 66, 95, 7, 84, 83, 113, 115, 1, 9, 5, 98,
            109, 26, 103],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([74, 92, 16, 54, 50, 81, 66, 95, 7, 84, 83, 113, 115, 1, 9, 5, 98,
            109, 26, 103],
           dtype='int64')


Multiple alphas:   0%|          | 0/3 [00:00<?, ?it/s]

2023-08-06 16:58:34 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/one_repair_lvl_many_models_ricci/tuning_results_Ricci_alpha_0.0_20230806__204940.csv
LGBMClassifier:  {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 3, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 20, 'objective': None, 'random_state': 500, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'min_data_in_leaf': 100}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

##############################  [Model 1 / 4] Analyze LGBMClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:58:34 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]









2023-08-06 16:58:35 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:58:35 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LGBMClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.479603,0.479603,0.479603,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
1,Std,0.063768,0.063768,0.063768,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
2,IQR,0.079365,0.079365,0.079365,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
3,Aleatoric_Uncertainty,0.987004,0.987004,0.987004,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
4,Overall_Uncertainty,0.998799,0.998799,0.998799,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
5,Statistical_Bias,0.504707,0.498926,0.510198,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
6,Jitter,0.460251,0.460251,0.460251,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
7,Per_Sample_Accuracy,0.466538,0.507632,0.4275,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
8,Label_Stability,0.29,0.29,0.29,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
9,TPR,1.0,1.0,1.0,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."






##############################  [Model 2 / 4] Analyze LogisticRegression  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:58:36 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:58:37 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:58:38 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.571232,0.444764,0.691376,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
1,Std,0.040976,0.045676,0.03651,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
2,IQR,0.056258,0.063761,0.04913,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
3,Aleatoric_Uncertainty,0.676298,0.727189,0.627952,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
4,Overall_Uncertainty,0.684204,0.736129,0.634874,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
5,Statistical_Bias,0.217211,0.249184,0.186837,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
6,Jitter,0.058458,0.076596,0.041226,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
7,Per_Sample_Accuracy,0.96141,0.952368,0.97,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
8,Label_Stability,0.922821,0.904737,0.94,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."
9,TPR,1.0,1.0,1.0,501,LogisticRegression,"{'C': 0.1, 'class_weight': None, 'dual': False..."






##############################  [Model 3 / 4] Analyze RandomForestClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:58:38 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:59:04 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:59:04 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.620668,0.477292,0.756875,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
1,Std,0.033775,0.045377,0.022754,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
2,IQR,0.037885,0.055526,0.021125,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
3,Aleatoric_Uncertainty,0.160549,0.212317,0.11137,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
4,Overall_Uncertainty,0.184202,0.243674,0.127704,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
5,Statistical_Bias,0.046919,0.067787,0.027095,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
6,Jitter,0.014624,0.02452,0.005224,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
7,Per_Sample_Accuracy,0.982308,0.966579,0.99725,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
8,Label_Stability,0.978462,0.961579,0.9945,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
9,TPR,0.933333,0.9,1.0,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."






##############################  [Model 4 / 4] Analyze MLPClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:59:05 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:59:26 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:59:27 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[MLPClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.635526,0.483158,0.780276,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
1,Std,0.022251,0.022616,0.021905,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
2,IQR,0.02184,0.00904,0.034,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
3,Aleatoric_Uncertainty,0.021578,0.019213,0.023824,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
4,Overall_Uncertainty,0.046884,0.042503,0.051046,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
5,Statistical_Bias,0.021076,0.010693,0.030939,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
6,Jitter,0.018906,0.014541,0.023053,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
7,Per_Sample_Accuracy,0.978846,0.991579,0.96675,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
8,Label_Stability,0.974103,0.983158,0.9655,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
9,TPR,0.933333,1.0,0.8,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."








2023-08-06 16:59:28 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.4
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/one_repair_lvl_many_models_ricci/tuning_results_Ricci_alpha_0.4_20230806__205057.csv
LGBMClassifier:  {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 3, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 20, 'objective': None, 'random_state': 501, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'min_data_in_leaf': 100}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

##############################  [Model 1 / 4] Analyze LGBMClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:59:28 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]







2023-08-06 16:59:29 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:59:29 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LGBMClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.474603,0.474603,0.474603,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
1,Std,0.061978,0.061978,0.061978,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
2,IQR,0.095238,0.095238,0.095238,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
3,Aleatoric_Uncertainty,0.98701,0.98701,0.98701,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
4,Overall_Uncertainty,0.998138,0.998138,0.998138,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
5,Statistical_Bias,0.505861,0.498663,0.512698,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
6,Jitter,0.451055,0.451055,0.451055,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
7,Per_Sample_Accuracy,0.463077,0.508421,0.42,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
8,Label_Stability,0.32,0.32,0.32,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
9,TPR,1.0,1.0,1.0,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."






##############################  [Model 2 / 4] Analyze LogisticRegression  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:59:30 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:59:31 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:59:31 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.646263,0.566469,0.722068,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
1,Std,0.014746,0.019457,0.01027,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
2,IQR,0.01916,0.025438,0.013196,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
3,Aleatoric_Uncertainty,0.171825,0.21743,0.1285,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
4,Overall_Uncertainty,0.175991,0.223062,0.131273,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
5,Statistical_Bias,0.07376,0.099801,0.04902,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
6,Jitter,0.016828,0.028112,0.006108,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
7,Per_Sample_Accuracy,0.931538,0.908684,0.95325,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
8,Label_Stability,0.978462,0.962632,0.9935,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
9,TPR,0.866667,0.8,1.0,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."






##############################  [Model 3 / 4] Analyze RandomForestClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:59:32 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 16:59:58 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 16:59:58 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.619694,0.534624,0.70051,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
1,Std,0.037746,0.050258,0.02586,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
2,IQR,0.044167,0.062632,0.026625,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
3,Aleatoric_Uncertainty,0.184981,0.250901,0.122357,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
4,Overall_Uncertainty,0.21097,0.284515,0.141101,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
5,Statistical_Bias,0.09104,0.124961,0.058815,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
6,Jitter,0.011006,0.020013,0.00245,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
7,Per_Sample_Accuracy,0.927436,0.902368,0.95125,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
8,Label_Stability,0.987179,0.976316,0.9975,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
9,TPR,0.866667,0.8,1.0,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."






##############################  [Model 4 / 4] Analyze MLPClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 16:59:59 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 17:00:19 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 17:00:19 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[MLPClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.663101,0.593897,0.728845,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
1,Std,0.065951,0.090169,0.042943,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
2,IQR,0.102661,0.157894,0.050189,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
3,Aleatoric_Uncertainty,0.006752,0.004936,0.008477,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
4,Overall_Uncertainty,0.126591,0.171942,0.083507,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
5,Statistical_Bias,0.078496,0.120214,0.038863,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
6,Jitter,0.059324,0.081804,0.037967,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
7,Per_Sample_Accuracy,0.921667,0.879474,0.96175,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
8,Label_Stability,0.904872,0.864211,0.9435,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."
9,TPR,0.866667,0.8,1.0,501,MLPClassifier,"{'activation': 'relu', 'alpha': 0.0001, 'batch..."








2023-08-06 17:00:20 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.7
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/one_repair_lvl_many_models_ricci/tuning_results_Ricci_alpha_0.7_20230806__205137.csv
LGBMClassifier:  {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 3, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 20, 'objective': None, 'random_state': 501, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'min_data_in_leaf': 100}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/4 [00:00<?, ?it/s]

##############################  [Model 1 / 4] Analyze LGBMClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 17:00:20 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]







2023-08-06 17:00:21 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 17:00:21 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LGBMClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.482381,0.482381,0.482381,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
1,Std,0.05996,0.05996,0.05996,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
2,IQR,0.079365,0.079365,0.079365,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
3,Aleatoric_Uncertainty,0.9887,0.9887,0.9887,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
4,Overall_Uncertainty,0.999104,0.999104,0.999104,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
5,Statistical_Bias,0.504066,0.499073,0.50881,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
6,Jitter,0.478191,0.478191,0.478191,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
7,Per_Sample_Accuracy,0.474615,0.505789,0.445,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
8,Label_Stability,0.22,0.22,0.22,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."
9,TPR,1.0,1.0,1.0,501,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None..."






##############################  [Model 2 / 4] Analyze LogisticRegression  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 17:00:22 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 17:00:23 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 17:00:23 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[LogisticRegression] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.646219,0.59667,0.693291,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
1,Std,0.021119,0.025179,0.017263,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
2,IQR,0.027117,0.032228,0.022262,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
3,Aleatoric_Uncertainty,0.206511,0.238778,0.175856,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
4,Overall_Uncertainty,0.213861,0.247638,0.181772,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
5,Statistical_Bias,0.102211,0.135609,0.070482,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
6,Jitter,0.018242,0.031947,0.005224,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
7,Per_Sample_Accuracy,0.910256,0.871316,0.94725,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
8,Label_Stability,0.968718,0.941579,0.9945,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."
9,TPR,0.866667,0.8,1.0,501,LogisticRegression,"{'C': 1, 'class_weight': None, 'dual': False, ..."






##############################  [Model 3 / 4] Analyze RandomForestClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 17:00:24 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 17:00:50 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 17:00:50 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.608658,0.54419,0.669903,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
1,Std,0.054616,0.064467,0.045256,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
2,IQR,0.069301,0.0853,0.054103,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
3,Aleatoric_Uncertainty,0.291652,0.357922,0.228696,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
4,Overall_Uncertainty,0.325578,0.396115,0.258568,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
5,Statistical_Bias,0.124454,0.156506,0.094004,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
6,Jitter,0.027735,0.038429,0.017575,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
7,Per_Sample_Accuracy,0.916795,0.892105,0.94025,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
8,Label_Stability,0.965897,0.952632,0.9785,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."
9,TPR,0.866667,0.8,1.0,501,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w..."






##############################  [Model 4 / 4] Analyze MLPClassifier  ##############################
Model seed:  501

Protected groups splits:
Race_priv (19, 1)
Race_dis (20, 1)




2023-08-06 17:00:51 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 17:03:06 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 17:03:07 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[MLPClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,Race_priv,Race_dis,Model_Seed,Model_Name,Model_Params
0,Mean,0.700991,0.685886,0.715341,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
1,Std,0.125832,0.14893,0.103889,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
2,IQR,0.074662,0.114826,0.036506,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
3,Aleatoric_Uncertainty,0.096567,0.089058,0.103699,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
4,Overall_Uncertainty,0.247508,0.285696,0.21123,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
5,Statistical_Bias,0.140328,0.22759,0.057429,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
6,Jitter,0.073375,0.09169,0.055975,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
7,Per_Sample_Accuracy,0.865897,0.772105,0.955,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
8,Label_Stability,0.905128,0.886316,0.923,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."
9,TPR,0.666667,0.5,1.0,501,MLPClassifier,"{'activation': 'logistic', 'alpha': 0.0001, 'b..."








2023-08-06 17:03:08 experiment_interface.py INFO    : Experiment run was successful!
INFO:root:Experiment run was successful!


### Experiment iteration 6

In [87]:
# Configs for an experiment iteration
exp_iter_num = 6
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Ricci_alpha_0.0_20230806__204940.csv',
    'tuning_results_Ricci_alpha_0.4_20230806__205057.csv',
    'tuning_results_Ricci_alpha_0.7_20230806__205137.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [88]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True, 
                                   dataset_name=DATASET_NAME)

2023-08-06 14:15:23 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 600,
 'experiment_iteration': 'Exp_iter_6',
 'fair_intervention_params_lst': '[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, '
                                 '0.8, 0.9, 1.0]',
 'intervention_param': 1.0,
 'model_init_seed': 600,
 'session_uuid': '6d916473-4d0f-4247-9638-b4f140ec4b28'}




2023-08-06 14:15:23 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([102, 112, 115,  24,  57, 109,  61,  56,  80,   3, 104, 103, 111,
             45,  14,  23, 113,   4,  83,  17],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([102, 112, 115,  24,  57, 109,  61,  56,  80,   3, 104, 103, 111,
             45,  14,  23, 113,   4,  83,  17],
           dtype='int64')


Multiple alphas:   0%|          | 0/11 [00:00<?, ?it/s]

2023-08-06 14:15:23 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


intervention_param:  0.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.0_20230806__173738.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 600, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.1
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.1_20230806__173842.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.2
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.2_20230806__173944.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.3
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.3_20230806__174046.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.4
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.4_20230806__174149.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.5
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.5_20230806__174251.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.6
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.6_20230806__174353.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.7
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.7_20230806__174456.csv
RandomForestClassifier:  {'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.8
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.8_20230806__174554.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.9
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_0.9_20230806__174655.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 200, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  1.0
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_ricci/tuning_results_Ricci_alpha_1.0_20230806__174821.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 601, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]