In [15]:
# !pip install -r ./requirements.txt

In [6]:
# !pip uninstall virny -y

In [8]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@development

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@development

In [9]:
# !pip install aif360

In [10]:
# !pip install BlackBoxAuditing==0.1.54

In [11]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [13]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /home/dh3553/projects/fairness-variance


## Import dependencies

In [14]:
import os
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import LawSchoolDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_model_params_for_mult_repair_levels

from source.preprocessing import get_simple_preprocessor
from source.experiment_interface import run_exp_iter_with_disparate_impact

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'


## Define Input Variables

In [15]:
# ROOT_DIR = os.path.join(os.getcwd(), "..", "..")
ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'mult_repair_levels_law_school'
DB_COLLECTION_NAME = 'exp_mult_repair_levels'
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME)
# FAIR_INTERVENTION_PARAMS_LST = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
FAIR_INTERVENTION_PARAMS_LST = [0.0, 0.2, 0.4]

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', EXPERIMENT_NAME, 'law_school_2018_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [16]:
import os
from dotenv import load_dotenv

load_dotenv('./configs/secrets.env')
os.getenv("DB_NAME")

'fairness_variance'

In [17]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [19]:
import uuid

custom_table_fields_dct = {
#     'session_uuid': str(uuid.uuid4()),
    'session_uuid': '9430484b-bec6-42a0-ac65-11c53446a61f',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

Current session uuid:  9430484b-bec6-42a0-ac65-11c53446a61f


## Initialize custom objects

In [20]:
data_loader = LawSchoolDataset()
data_loader.X_data.head()

Unnamed: 0,decile1b,decile3,lsat,ugpa,zfygpa,zgpa,fulltime,fam_inc,male,tier,race
0,10.0,10.0,44.0,3.5,1.33,1.88,1.0,5.0,0.0,4.0,White
1,5.0,4.0,29.0,3.5,-0.11,-0.57,1.0,4.0,0.0,2.0,White
2,8.0,7.0,37.0,3.4,0.63,0.37,1.0,3.0,1.0,4.0,White
3,8.0,7.0,43.0,3.3,0.67,0.34,1.0,4.0,0.0,4.0,White
4,3.0,2.0,41.0,3.3,-0.67,-1.3,1.0,4.0,0.0,5.0,White


In [21]:
data_loader.X_data.shape

(20798, 11)

In [22]:
data_loader.X_data.dtypes

decile1b    float64
decile3     float64
lsat        float64
ugpa        float64
zfygpa      float64
zgpa        float64
fulltime     object
fam_inc      object
male         object
tier         object
race         object
dtype: object

In [23]:
data_loader.y_data.value_counts()

1.0    18505
0.0     2293
Name: pass_bar, dtype: int64

In [24]:
data_loader.X_data['race'].value_counts()

White        17491
Non-White     3307
Name: race, dtype: int64

In [25]:
data_loader.X_data['male'].value_counts()

1.0    11675
0.0     9123
Name: male, dtype: int64

## Run experiment iterations

### Experiment iteration 1

In [26]:
# tuned_params_filenames = ['tuning_results_Folktables_NY_2018_Employment_alpha_0.8_20230706__115508.csv']
# tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
#                          for tuned_params_filename in tuned_params_filenames]

In [29]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [30]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   with_tuning=True,
                                   # with_tuning=False,
                                   # tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)

2023-08-06 11:39:50 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 100,
 'experiment_iteration': 'Exp_iter_1',
 'fair_intervention_params_lst': '[0.0, 0.2, 0.4]',
 'model_init_seed': 100,
 'session_uuid': '9430484b-bec6-42a0-ac65-11c53446a61f'}




2023-08-06 11:39:50 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([ 7102,   593, 18841,  5078, 14172,  8064, 13554, 13401, 17015,
            18446,  6938,  3450,  9375, 19994, 16100,  4401,   142, 15143,
             2188,  4332],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([ 7102,   593, 18841,  5078, 14172,  8064, 13554, 13401, 17015,
            18446,  6938,  3450,  9375, 19994, 16100,  4401,   142, 15143,
             2188,  4332],
           dtype='int64')


Multiple alphas:   0%|          | 0/3 [00:00<?, ?it/s]

intervention_param:  0.0
2023/08/06, 11:39:51: Tuning RandomForestClassifier...


2023-08-06 11:46:02 experiment_interface.py INFO    : Models are tuned and saved to a file
INFO:root:Models are tuned and saved to a file


2023/08/06, 11:46:02: Tuning for RandomForestClassifier is finished [F1 score = 0.6416001167408139, Accuracy = 0.8899038461538461]



Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

##############################  [Model 1 / 1] Analyze RandomForestClassifier  ##############################
Model seed:  101

Protected groups splits:
male_priv (1150, 2)
male_dis (930, 2)
race_priv (1732, 2)
race_dis (348, 2)
male&race_priv (1908, 2)
male&race_dis (172, 2)




2023-08-06 11:46:02 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 12:09:46 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 12:10:04 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,male_priv,male_priv_correct,male_priv_incorrect,male_dis,male_dis_correct,male_dis_incorrect,race_priv,race_priv_correct,...,race_dis_incorrect,male&race_priv,male&race_priv_correct,male&race_priv_incorrect,male&race_dis,male&race_dis_correct,male&race_dis_incorrect,Model_Seed,Model_Name,Model_Params
0,Mean,0.115963,0.117311,0.095373,0.33338,0.114296,0.08851,0.314744,0.081031,0.067146,...,0.447365,0.101421,0.081745,0.298756,0.277275,0.230639,0.436318,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
1,Std,0.042495,0.042904,0.037832,0.092865,0.04199,0.036191,0.087061,0.036058,0.032085,...,0.099447,0.039746,0.034921,0.088137,0.072993,0.065641,0.098065,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
2,IQR,0.060659,0.061464,0.05421,0.13291,0.059663,0.051208,0.125396,0.051484,0.045652,...,0.140669,0.056801,0.04978,0.127217,0.103459,0.093406,0.137744,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
3,Aleatoric_Uncertainty,0.322009,0.324111,0.282911,0.729894,0.319408,0.269842,0.704718,0.266334,0.235372,...,0.842176,0.297128,0.258307,0.686458,0.598012,0.522906,0.854143,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
4,Overall_Uncertainty,0.34226,0.34449,0.301563,0.767279,0.339504,0.288075,0.739288,0.284496,0.252056,...,0.877983,0.316555,0.276038,0.722889,0.627413,0.550968,0.888107,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
5,Statistical_Bias,0.145848,0.141053,0.081952,0.723144,0.151777,0.077746,0.727264,0.116868,0.063443,...,0.660466,0.133043,0.072356,0.741673,0.287892,0.18108,0.652148,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
6,Jitter,0.037718,0.036225,0.02382,0.1584,0.039565,0.02377,0.162351,0.023626,0.015181,...,0.217109,0.030641,0.01922,0.145187,0.116222,0.083519,0.227748,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
7,Per_Sample_Accuracy,0.895099,0.903117,0.983051,0.115849,0.885183,0.98321,0.12316,0.921409,0.989494,...,0.158062,0.906761,0.986363,0.108439,0.765727,0.940827,0.16859,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
8,Label_Stability,0.945562,0.948096,0.966159,0.770189,0.94243,0.96642,0.755943,0.966005,0.978987,...,0.687875,0.955723,0.972761,0.784855,0.832849,0.881654,0.66641,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
9,TPR,0.973442,0.973812,1.0,0.0,0.972973,1.0,0.0,0.989355,1.0,...,0.0,0.980256,1.0,0.0,0.878049,1.0,0.0,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."






intervention_param:  0.2
2023/08/06, 12:11:53: Tuning RandomForestClassifier...


2023-08-06 12:18:05 experiment_interface.py INFO    : Models are tuned and saved to a file
INFO:root:Models are tuned and saved to a file


2023/08/06, 12:18:05: Tuning for RandomForestClassifier is finished [F1 score = 0.648160123968979, Accuracy = 0.8950320512820514]



Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

##############################  [Model 1 / 1] Analyze RandomForestClassifier  ##############################
Model seed:  101

Protected groups splits:
male_priv (1150, 2)
male_dis (930, 2)
race_priv (1732, 2)
race_dis (348, 2)
male&race_priv (1908, 2)
male&race_dis (172, 2)




2023-08-06 12:18:05 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 12:27:25 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 12:27:42 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,male_priv,male_priv_correct,male_priv_incorrect,male_dis,male_dis_correct,male_dis_incorrect,race_priv,race_priv_correct,...,race_dis_incorrect,male&race_priv,male&race_priv_correct,male&race_priv_incorrect,male&race_dis,male&race_dis_correct,male&race_dis_incorrect,Model_Seed,Model_Name,Model_Params
0,Mean,0.119666,0.121389,0.099858,0.335672,0.117534,0.091147,0.31837,0.086278,0.072108,...,0.434641,0.105937,0.086698,0.301365,0.271953,0.220615,0.430857,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
1,Std,0.040937,0.041152,0.036611,0.086345,0.040672,0.034999,0.083849,0.034896,0.031277,...,0.094608,0.038192,0.033893,0.08187,0.071386,0.06274,0.098148,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
2,IQR,0.05721,0.057385,0.051111,0.119819,0.056995,0.048762,0.119652,0.048559,0.043386,...,0.132445,0.053241,0.047183,0.114778,0.101243,0.088749,0.139915,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
3,Aleatoric_Uncertainty,0.343515,0.347945,0.307089,0.754565,0.338037,0.288239,0.717055,0.289581,0.259033,...,0.851579,0.319931,0.282012,0.705108,0.605139,0.52297,0.859471,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
4,Overall_Uncertainty,0.361007,0.365328,0.323138,0.785214,0.355664,0.304083,0.748258,0.305155,0.273401,...,0.883501,0.336509,0.297257,0.735227,0.632761,0.548457,0.893703,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
5,Statistical_Bias,0.149317,0.144295,0.087174,0.712784,0.155527,0.080876,0.723702,0.120475,0.068412,...,0.656926,0.136512,0.077584,0.735102,0.291357,0.17549,0.649991,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
6,Jitter,0.035388,0.0337,0.02158,0.154325,0.037474,0.020466,0.166924,0.021548,0.012982,...,0.215802,0.028795,0.017582,0.142697,0.108522,0.067963,0.234063,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
7,Per_Sample_Accuracy,0.896034,0.904648,0.984498,0.109952,0.885382,0.985456,0.123704,0.923118,0.990943,...,0.153232,0.908281,0.987447,0.104123,0.760174,0.951154,0.169048,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
8,Label_Stability,0.949202,0.952078,0.969206,0.781619,0.945645,0.970985,0.752778,0.969111,0.981886,...,0.693537,0.95869,0.97502,0.792807,0.843953,0.902769,0.661905,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
9,TPR,0.975068,0.974782,1.0,0.0,0.97543,1.0,0.0,0.989355,1.0,...,0.0,0.980836,1.0,0.0,0.894309,1.0,0.0,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."






intervention_param:  0.4
2023/08/06, 12:29:32: Tuning RandomForestClassifier...


2023-08-06 12:35:50 experiment_interface.py INFO    : Models are tuned and saved to a file
INFO:root:Models are tuned and saved to a file


2023/08/06, 12:35:50: Tuning for RandomForestClassifier is finished [F1 score = 0.6447705517995004, Accuracy = 0.8907051282051283]



Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

##############################  [Model 1 / 1] Analyze RandomForestClassifier  ##############################
Model seed:  101

Protected groups splits:
male_priv (1150, 2)
male_dis (930, 2)
race_priv (1732, 2)
race_dis (348, 2)
male&race_priv (1908, 2)
male&race_dis (172, 2)




2023-08-06 12:35:50 abstract_overall_variance_analyzer.py INFO    : Start classifiers testing by bootstrap
INFO:root:Start classifiers testing by bootstrap


Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]





2023-08-06 12:59:43 abstract_overall_variance_analyzer.py INFO    : Successfully tested classifiers by bootstrap
INFO:root:Successfully tested classifiers by bootstrap
2023-08-06 13:00:01 abstract_overall_variance_analyzer.py INFO    : Successfully computed predict proba metrics
INFO:root:Successfully computed predict proba metrics



[RandomForestClassifier] Metrics matrix:


Unnamed: 0,Metric,overall,male_priv,male_priv_correct,male_priv_incorrect,male_dis,male_dis_correct,male_dis_incorrect,race_priv,race_priv_correct,...,race_dis_incorrect,male&race_priv,male&race_priv_correct,male&race_priv_incorrect,male&race_dis,male&race_dis_correct,male&race_dis_incorrect,Model_Seed,Model_Name,Model_Params
0,Mean,0.122132,0.125349,0.101837,0.347642,0.118153,0.09383,0.307234,0.090533,0.076418,...,0.427644,0.108988,0.089704,0.30362,0.267932,0.21484,0.42238,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
1,Std,0.04298,0.0435,0.03837,0.092,0.042337,0.036715,0.086043,0.037277,0.033471,...,0.097224,0.040341,0.035827,0.0859,0.072256,0.062205,0.101495,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
2,IQR,0.061592,0.062478,0.055115,0.132098,0.060496,0.052355,0.12378,0.053394,0.047803,...,0.137717,0.057809,0.051266,0.12385,0.103556,0.089549,0.144303,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
3,Aleatoric_Uncertainty,0.351636,0.358458,0.314899,0.770281,0.3432,0.296463,0.706517,0.299726,0.269759,...,0.851524,0.328722,0.291075,0.70869,0.605824,0.519333,0.857434,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
4,Overall_Uncertainty,0.370651,0.37738,0.332156,0.804955,0.36233,0.313643,0.740799,0.316995,0.285591,...,0.885462,0.346851,0.30765,0.742508,0.634661,0.545339,0.894509,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
5,Statistical_Bias,0.152726,0.148286,0.089734,0.701871,0.158217,0.084396,0.732068,0.124463,0.072752,...,0.655407,0.140007,0.081068,0.734879,0.293821,0.172902,0.645585,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
6,Jitter,0.038247,0.038091,0.023868,0.172566,0.038438,0.023861,0.151761,0.025175,0.016794,...,0.215918,0.031652,0.020758,0.141605,0.111396,0.065994,0.243475,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
7,Per_Sample_Accuracy,0.893149,0.900883,0.982668,0.127636,0.883586,0.982888,0.111651,0.920465,0.988082,...,0.159412,0.905529,0.984983,0.103605,0.755814,0.952695,0.183068,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
8,Label_Stability,0.944471,0.944443,0.965529,0.745091,0.944505,0.965947,0.77783,0.963863,0.97629,...,0.682824,0.95413,0.970092,0.793023,0.837326,0.906328,0.636591,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."
9,TPR,0.97561,0.973812,1.0,0.0,0.977887,1.0,0.0,0.988103,1.0,...,0.0,0.980836,1.0,0.0,0.902439,1.0,0.0,101,RandomForestClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_..."








2023-08-06 13:01:50 experiment_interface.py INFO    : Experiment run was successful!
INFO:root:Experiment run was successful!


### Experiment iteration 2

In [44]:
# Configs for an experiment iteration
exp_iter_num = 2
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [45]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)

2023-07-31 14:43:04 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:
INFO:root:Start an experiment iteration for the following custom params:


{'dataset_split_seed': 200,
 'experiment_iteration': 'Exp_iter_2',
 'fair_intervention_params_lst': '[0.0, 0.1, 0.2, 0.3, 0.4]',
 'model_init_seed': 200,
 'session_uuid': '116104d6-7d63-4564-ae57-82b19b260fa5'}




2023-07-31 14:43:04 experiment_interface.py INFO    : The dataset is preprocessed
INFO:root:The dataset is preprocessed


Top indexes of an X_test in a base flow dataset:  Int64Index([ 6043,  3745,  5159,  7241,  7820,  3695, 11501, 11432,  1163,
             8994,  7972,  2554,  9884,  2008,  6884, 11995,  5200,  4649,
            10244, 13775],
           dtype='int64')
Top indexes of an y_test in a base flow dataset:  Int64Index([ 6043,  3745,  5159,  7241,  7820,  3695, 11501, 11432,  1163,
             8994,  7972,  2554,  9884,  2008,  6884, 11995,  5200,  4649,
            10244, 13775],
           dtype='int64')


Multiple alphas:   0%|          | 0/5 [00:00<?, ?it/s]

intervention_param:  0.0


2023-07-31 14:43:07 experiment_interface.py INFO    : Models config is loaded from the input file
INFO:root:Models config is loaded from the input file


Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_pubcov_CA/tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 30, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 200, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.1
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_pubcov_CA/tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv
RandomForestClassifier:  {'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 40, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 201, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.2
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_pubcov_CA/tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 30, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 201, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.3
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_pubcov_CA/tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 30, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 5, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 201, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

intervention_param:  0.4
Path for tuned params:  /home/dh3553/projects/fairness-variance/results/mult_repair_levels_pubcov_CA/tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv
RandomForestClassifier:  {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 40, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 201, 'verbose': 0, 'warm_start': False}


Multiple runs progress:   0%|          | 0/1 [00:00<?, ?it/s]

Analyze models in one run:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

### Experiment iteration 3

In [None]:
# Configs for an experiment iteration
exp_iter_num = 3
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [None]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)

### Experiment iteration 4

In [None]:
# Configs for an experiment iteration
exp_iter_num = 4
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [46]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)

### Experiment iteration 5

In [None]:
# Configs for an experiment iteration
exp_iter_num = 5
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [None]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)

### Experiment iteration 6

In [None]:
# Configs for an experiment iteration
exp_iter_num = 6
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.0_20230731__173053.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.1_20230731__174151.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.2_20230731__173153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.3_20230731__174153.csv',
    'tuning_results_Folktables_CA_2018_Public_Coverage_alpha_0.4_20230731__173254.csv',
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_model_params_for_mult_repair_levels(experiment_seed)

In [None]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
#                                    with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   verbose=True)