In [1]:
# !pip install -r ./requirements.txt

In [2]:
# !pip uninstall virny -y

In [3]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@feature/add_meta_learner_computation_mode

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@feature/add_meta_learner_computation_mode

In [4]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [5]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [6]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../../..")

print('Current location: ', os.getcwd())

Current location:  /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [7]:
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import LawSchoolDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_folktables_employment_models_params_for_tuning

from source.experiment_interface import run_exp_iter_with_disparate_impact

pip install 'aif360[LawSchoolGPA]'


## Define Input Variables

In [8]:
ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'law_school'
DB_COLLECTION_NAME = 'one_repair_lvl_many_models'
FAIR_INTERVENTION_PARAMS_LST = [0.0]
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME)

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', 'test_meta_learner', EXPERIMENT_NAME, 'law_school_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [9]:
import os
from dotenv import load_dotenv

load_dotenv('./configs/secrets.env')
os.getenv("DB_NAME")

'fairness_variance'

In [10]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [11]:
import uuid

custom_table_fields_dct = {
    # 'session_uuid': str(uuid.uuid4()),
    'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

Current session uuid:  9c3f16a8-fc31-4bb1-b6a8-c535326d57d4


## Initialize custom objects

In [12]:
data_loader = LawSchoolDataset()
data_loader.X_data.head()

Unnamed: 0,decile1b,decile3,lsat,ugpa,zfygpa,zgpa,fulltime,fam_inc,male,tier,race
0,10.0,10.0,44.0,3.5,1.33,1.88,1.0,5.0,0.0,4.0,White
1,5.0,4.0,29.0,3.5,-0.11,-0.57,1.0,4.0,0.0,2.0,White
2,8.0,7.0,37.0,3.4,0.63,0.37,1.0,3.0,1.0,4.0,White
3,8.0,7.0,43.0,3.3,0.67,0.34,1.0,4.0,0.0,4.0,White
4,3.0,2.0,41.0,3.3,-0.67,-1.3,1.0,4.0,0.0,5.0,White


In [13]:
data_loader.X_data.shape

(20798, 11)

## Run experiment iterations

### Experiment iteration 1

In [14]:
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]

In [15]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [16]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 00:47:08 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 100,
 'experiment_iteration': 'Exp_iter_1',
 'fair_intervention_params_lst': '[0.0]',
 'model_init_seed': 100,
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 00:47:08 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 00:47:08 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([ 7102,   593, 18841,  5078, 14172,  8064, 13554, 13401, 17015,
            18446,  6938,  3450,  9375, 19994, 16100,  4401,   142, 15143,
             2188,  4332],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([ 7102,   593, 18841,  5078, 14172,  8064, 13554, 13401, 17015,
            18446,  6938,  3450,  9375, 19994, 16100,  4401,   142, 15143,
             2188,  4332],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([7102, 593, 18841, 5078, 14172, 8064, 13554, 13401, 17015, 18446], dtype='int64')
self.error_test.index[:10] --  Int64Index([7102, 593, 18841, 5078, 14172, 8064, 13554, 13401, 17015, 18446], dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total=  46.0s
[Voting] ................... (2 of 10) Processing gbt-2, total=  42.0s
[Voting] ................... (3 of 10) Processing gbt-3, total=  41.4s
[Voting] ................... (4 of 10) Processing gbt-4, total= 1.0min
[Voting] ................... (5 of 10) Processing gbt-5, total= 1.2min
[Voting] ................... (6 of 10) Processing gbt-6, total= 1.1min
[Voting] ................... (7 of 10) Processing gbt-7, total= 1.2min
[Voting] ................... (8 of 10) Processing gbt-8, total= 1.3min
[Voting] ................... (9 of 10) Processing gbt-9, total= 1.1min
[Voting] ................. (10 of 10) Processing gbt-10, total= 1.2min


Best para

### Experiment iteration 2

In [17]:
# Configs for an experiment iteration
exp_iter_num = 2
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [18]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 00:57:31 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 200,
 'experiment_iteration': 'Exp_iter_2',
 'fair_intervention_params_lst': '[0.0]',
 'intervention_param': 0.0,
 'model_init_seed': 200,
 'run_start_date_time': datetime.datetime(2024, 3, 4, 22, 47, 8, 700069, tzinfo=datetime.timezone.utc),
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 00:57:31 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 00:57:31 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([11154,  2918,  4165, 16989, 11116, 18464,  5262,  2614, 11058,
            14769, 11087, 10125, 19318, 10215, 10253,  7587,  2716,   577,
             2257,  1011],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([11154,  2918,  4165, 16989, 11116, 18464,  5262,  2614, 11058,
            14769, 11087, 10125, 19318, 10215, 10253,  7587,  2716,   577,
             2257,  1011],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([11154, 2918, 4165, 16989, 11116, 18464, 5262, 2614, 11058, 14769], dtype='int64')
self.error_test.index[:10] --  Int64Index([11154, 2918, 4165, 16989, 11116, 18464, 5262, 2614, 11058, 14769], dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total= 1.1min
[Voting] ................... (2 of 10) Processing gbt-2, total= 1.1min
[Voting] ................... (3 of 10) Processing gbt-3, total= 1.2min
[Voting] ................... (4 of 10) Processing gbt-4, total= 1.1min
[Voting] ................... (5 of 10) Processing gbt-5, total=  44.4s
[Voting] ................... (6 of 10) Processing gbt-6, total=  43.1s
[Voting] ................... (7 of 10) Processing gbt-7, total=  43.4s
[Voting] ................... (8 of 10) Processing gbt-8, total=  43.3s
[Voting] ................... (9 of 10) Processing gbt-9, total=  44.7s
[Voting] ................. (10 of 10) Processing gbt-10, total=  43.2s


Best pa

### Experiment iteration 3

In [19]:
# Configs for an experiment iteration
exp_iter_num = 3
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [20]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 01:06:27 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 300,
 'experiment_iteration': 'Exp_iter_3',
 'fair_intervention_params_lst': '[0.0]',
 'intervention_param': 0.0,
 'model_init_seed': 300,
 'run_start_date_time': datetime.datetime(2024, 3, 4, 22, 57, 31, 449113, tzinfo=datetime.timezone.utc),
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 01:06:27 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 01:06:27 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([19898, 14980, 13028,  5574, 17599,  3210, 11359, 19768, 19647,
             5966, 12835,  9832,  4098,  6607, 11751,   861, 11502, 17873,
            10882, 17470],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([19898, 14980, 13028,  5574, 17599,  3210, 11359, 19768, 19647,
             5966, 12835,  9832,  4098,  6607, 11751,   861, 11502, 17873,
            10882, 17470],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([19898, 14980, 13028, 5574, 17599, 3210, 11359, 19768, 19647, 5966], dtype='int64')
self.error_test.index[:10] --  Int64Index([19898, 14980, 13028, 5574, 17599, 3210, 11359, 19768, 19647, 5966], dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total=  57.0s
[Voting] ................... (2 of 10) Processing gbt-2, total= 1.5min
[Voting] ................... (3 of 10) Processing gbt-3, total= 1.1min
[Voting] ................... (4 of 10) Processing gbt-4, total= 1.5min
[Voting] ................... (5 of 10) Processing gbt-5, total= 1.5min
[Voting] ................... (6 of 10) Processing gbt-6, total= 1.3min
[Voting] ................... (7 of 10) Processing gbt-7, total=  56.6s
[Voting] ................... (8 of 10) Processing gbt-8, total=  56.4s
[Voting] ................... (9 of 10) Processing gbt-9, total= 1.5min
[Voting] ................. (10 of 10) Processing gbt-10, total= 1.5min


Best 

### Experiment iteration 4

In [21]:
# Configs for an experiment iteration
exp_iter_num = 4
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [22]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 01:19:07 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 400,
 'experiment_iteration': 'Exp_iter_4',
 'fair_intervention_params_lst': '[0.0]',
 'intervention_param': 0.0,
 'model_init_seed': 400,
 'run_start_date_time': datetime.datetime(2024, 3, 4, 23, 6, 27, 790660, tzinfo=datetime.timezone.utc),
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 01:19:07 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 01:19:08 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([ 5178,  6668, 11759,   242, 15137, 15320, 16824, 15370, 10348,
            18018,   684, 12169,   822,  4052,  5211,  8436,   126,  9549,
            13980,   403],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([ 5178,  6668, 11759,   242, 15137, 15320, 16824, 15370, 10348,
            18018,   684, 12169,   822,  4052,  5211,  8436,   126,  9549,
            13980,   403],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([5178, 6668, 11759, 242, 15137, 15320, 16824, 15370, 10348, 18018], dtype='int64')
self.error_test.index[:10] --  Int64Index([5178, 6668, 11759, 242, 15137, 15320, 16824, 15370, 10348, 18018], dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total= 1.3min
[Voting] ................... (2 of 10) Processing gbt-2, total=  58.9s
[Voting] ................... (3 of 10) Processing gbt-3, total=  49.4s
[Voting] ................... (4 of 10) Processing gbt-4, total=  46.2s
[Voting] ................... (5 of 10) Processing gbt-5, total=  46.1s
[Voting] ................... (6 of 10) Processing gbt-6, total= 1.0min
[Voting] ................... (7 of 10) Processing gbt-7, total= 1.5min
[Voting] ................... (8 of 10) Processing gbt-8, total= 1.5min
[Voting] ................... (9 of 10) Processing gbt-9, total= 1.3min
[Voting] ................. (10 of 10) Processing gbt-10, total=  56.8s


Best pa

### Experiment iteration 5

In [23]:
# Configs for an experiment iteration
exp_iter_num = 5
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [24]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 01:30:01 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 500,
 'experiment_iteration': 'Exp_iter_5',
 'fair_intervention_params_lst': '[0.0]',
 'intervention_param': 0.0,
 'model_init_seed': 500,
 'run_start_date_time': datetime.datetime(2024, 3, 4, 23, 19, 8, 5657, tzinfo=datetime.timezone.utc),
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 01:30:01 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 01:30:01 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([12269, 18551, 11422,  4842,  2061, 12046, 16727, 11861, 14580,
            19701, 12917,  7538, 17610, 19120,  5581, 18072, 13209, 20500,
            14741, 11309],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([12269, 18551, 11422,  4842,  2061, 12046, 16727, 11861, 14580,
            19701, 12917,  7538, 17610, 19120,  5581, 18072, 13209, 20500,
            14741, 11309],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([12269, 18551, 11422, 4842, 2061, 12046, 16727, 11861, 14580,
            19701],
           dtype='int64')
self.error_test.index[:10] --  Int64Index([12269, 18551, 11422, 4842, 2061, 12046, 16727, 11861, 14580,
            19701],
           dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total=  49.9s
[Voting] ................... (2 of 10) Processing gbt-2, total=  46.4s
[Voting] ................... (3 of 10) Processing gbt-3, total=  50.4s
[Voting] ................... (4 of 10) Processing gbt-4, total= 1.5min
[Voting] ................... (5 of 10) Processing gbt-5, total= 1.5min
[Voting] ................... (6 of 10) Processing gbt-6, total= 1.3min
[Voting] ................... (7 of 10) Processing gbt-7, total= 1.1min
[Voting] ................... (8 of 10) Processing gbt-8, total=  51.7s
[Voting] ................... (9 of 10) Processing gbt-9, total=  46.3s
[Voting] ................. (10

### Experiment iteration 6

In [25]:
# Configs for an experiment iteration
exp_iter_num = 6
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Law_School_alpha_0.0.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'test_meta_learner', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LogisticRegression': models_params_for_tuning['LogisticRegression']
}

In [26]:
run_exp_iter_with_disparate_impact(data_loader=exp_iter_data_loader,
                                   experiment_seed=experiment_seed,
                                   test_set_fraction=TEST_SET_FRACTION,
                                   db_writer_func=db_writer_func,
                                   fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                   models_params_for_tuning=models_params_for_tuning,
                                   metrics_computation_config=metrics_computation_config,
                                   custom_table_fields_dct=custom_table_fields_dct,
                                   # with_tuning=True,
                                   with_tuning=False,
                                   tuned_params_df_paths=tuned_params_df_paths,
                                   save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                   dataset_name='LawSchoolDataset',
                                   verbose=True)

2024-03-05 01:40:16 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 600,
 'experiment_iteration': 'Exp_iter_6',
 'fair_intervention_params_lst': '[0.0]',
 'intervention_param': 0.0,
 'model_init_seed': 600,
 'run_start_date_time': datetime.datetime(2024, 3, 4, 23, 30, 1, 690647, tzinfo=datetime.timezone.utc),
 'session_uuid': '9c3f16a8-fc31-4bb1-b6a8-c535326d57d4'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-03-05 01:40:16 experiment_interface.py INFO    : The dataset is preprocessed
2024-03-05 01:40:16 experiment_interface.py INFO    : Models config is loaded from the input file


intervention_param:  0.0
Skip preprocessing
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__fulltime_1.0', 'cat__fulltime_2.0', 'cat__fam_inc_1.0',
       'cat__fam_inc_2.0', 'cat__fam_inc_3.0', 'cat__fam_inc_4.0',
       'cat__fam_inc_5.0', 'cat__tier_1.0', 'cat__tier_2.0', 'cat__tier_3.0',
       'cat__tier_4.0', 'cat__tier_5.0', 'cat__tier_6.0', 'num__decile1b',
       'num__decile3', 'num__lsat', 'num__ugpa', 'num__zfygpa', 'num__zgpa'],
      dtype='object')
Top indexes of an X_test in the current base flow dataset:  Int64Index([ 9347, 19474, 17223,  1292, 13480,  5501,  2413, 10309,  6657,
            14802,  4565,  5107,  5436, 20504,  5959,  3443, 20524,  4751,
            18806,   198],
           dtype='int64')
Top indexes of an y_test in the current base flow dataset:  Int64Index([ 9347, 19474, 17223,  1292, 13480,  5501,  2413, 10309,  6657,
            14802,  4565,  5107,  5436, 20504,  5959,  3443, 20524,  4751,
            18806,   198],
           dtype='int64

Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Enabled a 'meta_learner' mode
self.y_pred_test.index[:10] --  Int64Index([9347, 19474, 17223, 1292, 13480, 5501, 2413, 10309, 6657, 14802], dtype='int64')
self.error_test.index[:10] --  Int64Index([9347, 19474, 17223, 1292, 13480, 5501, 2413, 10309, 6657, 14802], dtype='int64')
[Voting] ................... (1 of 10) Processing gbt-1, total= 1.3min
[Voting] ................... (2 of 10) Processing gbt-2, total= 1.5min
[Voting] ................... (3 of 10) Processing gbt-3, total= 1.4min
[Voting] ................... (4 of 10) Processing gbt-4, total= 1.2min
[Voting] ................... (5 of 10) Processing gbt-5, total=  56.1s
[Voting] ................... (6 of 10) Processing gbt-6, total= 1.3min
[Voting] ................... (7 of 10) Processing gbt-7, total= 1.5min
[Voting] ................... (8 of 10) Processing gbt-8, total= 1.4min
[Voting] ................... (9 of 10) Processing gbt-9, total= 1.2min
[Voting] ................. (10 of 10) Processing gbt-10, total=  53.3s


Best para