In [21]:
# !pip uninstall virny -y

In [22]:
# Install using an HTTP link
# !pip install git+https://github.com/DataResponsibly/Virny.git@development

# Install using an SSH link
# !pip install git+ssh://git@github.com/DataResponsibly/Virny.git@development

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [None]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

## Import dependencies

In [36]:
import os
import copy

from virny.utils.custom_initializers import create_config_obj
from virny.datasets import ACSEmploymentDataset

from configs.constants import TEST_SET_FRACTION, EXPERIMENT_SEEDS
from configs.models_config_for_tuning import get_folktables_employment_models_params_for_tuning

from source.preprocessing import get_simple_preprocessor
from source.experiment_interface import run_exp_iter_with_preprocessing_intervention

## Define Input Variables

In [10]:
ROOT_DIR = os.path.join(os.getcwd(), "..", "..")
# ROOT_DIR = os.getcwd()
EXPERIMENT_NAME = 'mult_repair_levels'
DB_COLLECTION_NAME = f'exp_{EXPERIMENT_NAME}'
SAVE_RESULTS_DIR_PATH = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME)
FAIR_INTERVENTION_PARAMS_LST = [0.0 + 0.1 * i for i in range(11)]

config_yaml_path = os.path.join(ROOT_DIR, 'notebooks', EXPERIMENT_NAME, 'folk_ny_2018_config.yaml')
metrics_computation_config = create_config_obj(config_yaml_path=config_yaml_path)

## Define a db writer and custom fields to insert into your database

In [12]:
from source.utils.db_functions import connect_to_mongodb

client, collection_obj, db_writer_func = connect_to_mongodb(DB_COLLECTION_NAME)

In [13]:
import uuid

custom_table_fields_dct = {
    'session_uuid': str(uuid.uuid4()),
    'preprocessing_techniques': 'No',
}
print('Current session uuid: ', custom_table_fields_dct['session_uuid'])

Current session uuid:  b2d237db-0a18-4f3e-8dba-5b36646bc540


## Initialize custom objects

In [14]:
data_loader = ACSEmploymentDataset(state=['NY'], year=2018, with_nulls=False,
                                   subsample_size=20_000, subsample_seed=42)
data_loader.X_data.head()

Downloading data for 2018 1-Year person survey for NY...


Unnamed: 0,MAR,MIL,ESP,MIG,DREM,NATIVITY,DIS,DEAR,DEYE,SEX,RAC1P,RELP,CIT,ANC,SCHL,AGEP
0,4,4,0,1,2,2,2,2,2,2,2,0,5,4,20,37
1,5,4,0,1,2,1,2,2,2,2,2,17,1,1,14,36
2,5,4,0,1,2,1,2,2,2,2,1,0,1,2,22,44
3,1,4,0,1,2,2,2,2,2,2,1,1,4,1,19,68
4,5,4,0,1,2,2,2,2,2,2,8,0,4,1,19,34


In [22]:
data_loader.X_data.shape

(20000, 16)

## Run experiment iterations

### Experiment iteration 1

In [42]:
# Configs for an experiment iteration
exp_iter_num = 1
experiment_seed = EXPERIMENT_SEEDS[exp_iter_num - 1]
tuned_params_filename = 'tuning_results_Folktables_Income_GA_2018_exp_iter_1_20230425__235953.csv'
tuned_params_df_path = os.path.join(ROOT_DIR, 'results', EXPERIMENT_NAME, tuned_params_filename)
custom_table_fields_dct['experiment_iteration'] = f'Exp_iter_{exp_iter_num}'

exp_iter_data_loader = copy.deepcopy(data_loader)  # Add deepcopy to avoid data leakage
models_params_for_tuning = get_folktables_employment_models_params_for_tuning(experiment_seed)
preprocessor = get_simple_preprocessor(exp_iter_data_loader)

In [None]:
run_exp_iter_with_preprocessing_intervention(data_loader=exp_iter_data_loader,
                                             experiment_seed=experiment_seed,
                                             test_set_fraction=TEST_SET_FRACTION,
                                             db_writer_func=db_writer_func,
                                             fair_intervention_params_lst=FAIR_INTERVENTION_PARAMS_LST,
                                             column_transformer=preprocessor,
                                             models_params_for_tuning=models_params_for_tuning,
                                             metrics_computation_config=metrics_computation_config,
                                             custom_table_fields_dct=custom_table_fields_dct,
                                             with_tuning=True,
                                             tuned_params_df_path=None,
                                             # with_tuning=False,
                                             # tuned_params_df_path=tuned_params_df_path,
                                             save_results_dir_path=SAVE_RESULTS_DIR_PATH,
                                             verbose=True)