In [None]:
# !pip install -r ./requirements.txt

In [None]:
# !pip uninstall virny -y

In [None]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@feature/prepare_for_uncertainty_experiments

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@feature/prepare_for_uncertainty_experiments

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [None]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../../../..")

print('Current location: ', os.getcwd())

## Import dependencies

In [None]:
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import ACSIncomeDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_folktables_employment_models_params_for_tuning

from source.experiment_interface import run_exp_iter_with_eq_odds

## Define Input Variables

In [None]:
ROOT_DIR = os.getcwd()
DATASET_NAME = 'ACSIncomeDataset'
EXPERIMENT_NAME = 'EqOdds_acs_income'
DB_COLLECTION_NAME = 'one_repair_lvl_many_models'
FAIR_INTERVENTION_PARAMS_LST = [True]
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                     'EqOddsPostprocessing', EXPERIMENT_NAME)

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', 'diff_fairness_interventions_exp',
                                'EqOddsPostprocessing', EXPERIMENT_NAME, 'folk_GA_2018_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [None]:
import os
from dotenv import load_dotenv

load_dotenv('./configs/secrets.env')
os.getenv("DB_NAME")

In [None]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [None]:
import uuid

custom_table_fields_dct = {
#     'session_uuid': str(uuid.uuid4()),
    'session_uuid': '3976c1bf-99fa-49f4-97ec-44b13cb64ef4',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

## Initialize custom objects

In [None]:
data_loader = ACSIncomeDataset(state=['GA'], year=2018, with_nulls=False,
                               subsample_size=15_000, subsample_seed=42)
data_loader.X_data.head()

In [None]:
data_loader.X_data.shape

## Run experiment iterations

### Experiment iteration 1

In [None]:
tuned_params_filenames = [
    'tuning_results_Folktables_GA_2018_Income_20240104__100612.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]

In [None]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
#                           with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)

### Experiment iteration 2

In [None]:
# Configs for an experiment iteration
exp_iter_num = 2
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_GA_2018_Income_20240104__100612.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LGBMClassifier': models_params_for_tuning['LGBMClassifier']
}

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
                          # with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)

### Experiment iteration 3

In [None]:
# Configs for an experiment iteration
exp_iter_num = 3
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_GA_2018_Income_20240104__100612.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LGBMClassifier': models_params_for_tuning['LGBMClassifier']
}

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
                          # with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)

### Experiment iteration 4

In [None]:
# Configs for an experiment iteration
exp_iter_num = 4
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
                          # with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)

### Experiment iteration 5

In [None]:
# Configs for an experiment iteration
exp_iter_num = 5
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
                          # with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)

### Experiment iteration 6

In [None]:
# Configs for an experiment iteration
exp_iter_num = 6
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filenames = [
    'tuning_results_Folktables_GA_2018_Income_20240104__100612.csv'
]
tuned_params_df_paths = [os.path.join(ROOT_DIR, 'results', 'diff_fairness_interventions_exp',
                                      'EqOddsPostprocessing', EXPERIMENT_NAME, tuned_params_filename)
                         for tuned_params_filename in tuned_params_filenames]
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
models_params_for_tuning = {
    'LGBMClassifier': models_params_for_tuning['LGBMClassifier']
}

In [None]:
run_exp_iter_with_eq_odds(data_loader=exp_iter_data_loader,
                          experiment_seed=experiment_seed,
                          test_set_fraction=TEST_SET_FRACTION,
                          db_writer_func=db_writer_func,
                          fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                          models_params_for_tuning=models_params_for_tuning,
                          metrics_computation_config=metrics_computation_config,
                          custom_table_fields_dct=custom_table_fields_dct,
                          # with_tuning=True,
                          with_tuning=False,
                          tuned_params_df_paths=tuned_params_df_paths,
                          save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                          dataset_name=DATASET_NAME,
                          verbose=True)