In [15]:
# !pip install -r ./requirements.txt

In [2]:
# !pip uninstall virny -y

In [4]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@feature/prepare_for_uncertainty_experiments

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@feature/prepare_for_uncertainty_experiments

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [3]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../../../..")

print('Current location: ', os.getcwd())

Current location:  /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [4]:
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import StudentPerformancePortugueseDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_folktables_employment_models_params_for_tuning

from source.experiment_interface import run_exp_iter_with_LFR

pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'


## Define Input Variables

In [5]:
ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'EqOdds_student_performance'
DB_COLLECTION_NAME = 'one_repair_lvl_many_models'
FAIR_INTERVENTION_PARAMS_LST = [True]
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                     'EqOdds_student_performance', EXPERIMENT_NAME)

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', 'diff_fairness_interventions_exp',
                                'EqOdds_student_performance', EXPERIMENT_NAME, 'student_performance_por_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [6]:
import os
from dotenv import load_dotenv

load_dotenv('./configs/secrets.env')
os.getenv("DB_NAME")

'fairness_variance'

In [7]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [9]:
import uuid

custom_table_fields_dct = {
    'session_uuid': str(uuid.uuid4()),
    # 'session_uuid': '',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

Current session uuid:  58b1d4c7-e5b0-4f2a-892d-b825009d18bf


## Initialize custom objects

In [10]:
data_loader = StudentPerformancePortugueseDataset()
data_loader.X_data.head()

Unnamed: 0,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,...,reason,guardian,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic
0,18,4,4,2,2,0,4,3,4,1,...,course,mother,yes,no,no,no,yes,yes,no,no
1,17,1,1,1,2,0,5,3,3,1,...,course,father,no,yes,no,no,no,yes,yes,no
2,15,1,1,1,2,0,4,3,2,2,...,other,mother,yes,no,no,no,yes,yes,yes,no
3,15,4,2,1,3,0,3,2,2,1,...,home,mother,no,yes,no,yes,yes,yes,yes,yes
4,16,3,3,1,2,0,4,3,2,1,...,home,father,no,yes,no,no,yes,yes,no,no


In [11]:
data_loader.X_data.shape

(649, 32)

## Run experiment iterations

### Experiment iteration 1

In [12]:
# tuned_params_filenames = ['tuning_results_Folktables_NY_2018_Employment_alpha_0.8_20230706__115508.csv']
# tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
#                          for tuned_params_filename in tuned_params_filenames]

In [13]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [14]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      with_tuning=True,
                      # with_tuning=False,
                      # tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)

2024-01-04 00:22:11 experiment_interface.py INFO    : Start an experiment iteration for the following custom params:


{'dataset_split_seed': 100,
 'experiment_iteration': 'Exp_iter_1',
 'fair_intervention_params_lst': '[True]',
 'model_init_seed': 100,
 'session_uuid': '58b1d4c7-e5b0-4f2a-892d-b825009d18bf'}


Multiple alphas:   0%|          | 0/1 [00:00<?, ?it/s]

2024-01-04 00:22:11 experiment_interface.py INFO    : The dataset is preprocessed


intervention_options:  True
cur_base_flow_dataset.X_train_val.columns:  Index(['cat__school_GP', 'cat__school_MS', 'cat__address_R', 'cat__address_U',
       'cat__famsize_GT3', 'cat__famsize_LE3', 'cat__Pstatus_A',
       'cat__Pstatus_T', 'cat__Mjob_at_home', 'cat__Mjob_health',
       'cat__Mjob_other', 'cat__Mjob_services', 'cat__Mjob_teacher',
       'cat__Fjob_at_home', 'cat__Fjob_health', 'cat__Fjob_other',
       'cat__Fjob_services', 'cat__Fjob_teacher', 'cat__reason_course',
       'cat__reason_home', 'cat__reason_other', 'cat__reason_reputation',
       'cat__guardian_father', 'cat__guardian_mother', 'cat__guardian_other',
       'cat__schoolsup_no', 'cat__schoolsup_yes', 'cat__famsup_no',
       'cat__famsup_yes', 'cat__paid_no', 'cat__paid_yes',
       'cat__activities_no', 'cat__activities_yes', 'cat__nursery_no',
       'cat__nursery_yes', 'cat__higher_no', 'cat__higher_yes',
       'cat__internet_no', 'cat__internet_yes', 'cat__romantic_no',
       'cat__romantic_yes', 

2024-01-04 00:22:12 experiment_interface.py INFO    : Models are tuned and saved to a file


Analyze multiple models:   0%|          | 0/1 [00:00<?, ?it/s]

Classifiers testing by bootstrap:   0%|          | 0/200 [00:00<?, ?it/s]

### Experiment iteration 2

In [21]:
# Configs for an experiment iteration
exp_iter_num = 2
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Student_Performance_Por_20240102__190652.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOdds_student_performance', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [1]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      # with_tuning=True,
                      with_tuning=False,
                      tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)

### Experiment iteration 3

In [23]:
# Configs for an experiment iteration
exp_iter_num = 3
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Student_Performance_Por_20240102__190652.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOdds_student_performance', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [2]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      # with_tuning=True,
                      with_tuning=False,
                      tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)

### Experiment iteration 4

In [25]:
# Configs for an experiment iteration
exp_iter_num = 4
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Student_Performance_Por_20240102__190652.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOdds_student_performance', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [3]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      # with_tuning=True,
                      with_tuning=False,
                      tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)

### Experiment iteration 5

In [27]:
# Configs for an experiment iteration
exp_iter_num = 5
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Student_Performance_Por_20240102__190652.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOdds_student_performance', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [4]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      # with_tuning=True,
                      with_tuning=False,
                      tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)

### Experiment iteration 6

In [29]:
# Configs for an experiment iteration
exp_iter_num = 6
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Student_Performance_Por_20240102__190652.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOdds_student_performance', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [5]:
run_exp_iter_with_LFR(data_loader=exp_iter_data_loader,
                      experiment_seed=experiment_seed,
                      test_set_fraction=TEST_SET_FRACTION,
                      db_writer_func=db_writer_func,
                      fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                      models_params_for_tuning=models_params_for_tuning,
                      metrics_computation_config=metrics_computation_config,
                      custom_table_fields_dct=custom_table_fields_dct,
                      # with_tuning=True,
                      with_tuning=False,
                      tuned_params_df_paths=tuned_params_df_paths,
                      save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                      dataset_name='StudentPerformancePortugueseDataset',
                      verbose=True)