In [105]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [106]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

## Import dependencies

In [107]:
import os
import pandas as pd
from IPython.display import display
from tqdm import tqdm

from configs import config
from configs.constants import ModelSetting
from utils.analyzers.stability_fairness_analyzer import StabilityFairnessAnalyzer
from utils.common_helpers import create_tuned_base_model, save_metrics_to_file
from utils.custom_classes.data_loader import ACSEmploymentDataset
from utils.custom_classes.generic_pipeline import GenericPipeline
from utils.analyzers.bias_analyzer import BiasAnalyzer

## Configs

In [108]:
STATE = config.DATASET_CONFIG['state']
YEAR = config.DATASET_CONFIG['year']
DATASET_NAME = f"Folktables_{STATE}_{YEAR}"
EXPERIMENT_NAME = 'Hypothesis_Space'

SEX_priv = RACE_priv = str(1)
# N_ESTIMATORS = 200
N_ESTIMATORS = 100
PROTECTED_GROUPS = ['SEX','RAC1P']
PRIV_VALUES = [SEX_priv, RACE_priv]
TUNED_PARAMS_FILE_PATH = os.path.join('..', '..', 'results', 'models_tuning', 'tuning_results_Folktables_GA_2018_20221215__105658.csv')

## Models tuned hyper-parameters

In [109]:
models_tuned_params_df = pd.read_csv(TUNED_PARAMS_FILE_PATH)
models_tuned_params_df

Unnamed: 0.1,Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,0,Folktables_GA_2018,LogisticRegression,0.8117,0.8122,"{'max_iter': 50, 'penalty': 'l2', 'solver': 'l..."
1,1,Folktables_GA_2018,DecisionTreeClassifier,0.8228,0.823,"{'criterion': 'entropy', 'max_depth': 10, 'max..."
2,2,Folktables_GA_2018,RandomForestClassifier,0.8292,0.8295,"{'max_depth': 10, 'max_features': 0.6, 'min_sa..."
3,3,Folktables_GA_2018,XGBClassifier,0.8313,0.8318,"{'learning_rate': 0.1, 'max_depth': 5, 'n_esti..."
4,4,Folktables_GA_2018,KNeighborsClassifier,0.8063,0.8068,"{'metric': 'manhattan', 'n_neighbors': 15, 'we..."
5,5,Folktables_GA_2018,MLPClassifier_1L_100,,,{}
6,6,Folktables_GA_2018,MLPClassifier_3L_100_50_100,,,{}
7,7,Folktables_GA_2018,SVC,0.8247,0.825,"{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}"


## Preprocess dataset

In [110]:
dataset = ACSEmploymentDataset(state=[STATE], year=YEAR, root_dir=os.path.join('..', '..', 'data'), with_nulls=False)
dataset.X_data.head()

Unnamed: 0,MAR,MIL,ESP,MIG,DREM,NATIVITY,DIS,DEAR,DEYE,SEX,RAC1P,RELP,CIT,ANC,SCHL,AGEP
0,5,4,0,3,2,1,2,2,2,1,2,16,1,1,13,51
1,3,4,0,1,2,1,1,2,1,2,1,16,1,4,16,56
2,5,4,0,1,1,1,1,2,2,2,2,17,1,4,20,23
3,1,4,0,1,2,1,2,2,2,1,2,16,1,1,17,43
4,5,4,0,1,2,1,2,2,2,2,1,16,1,1,19,20


## Run experiments

In [111]:
def create_base_pipeline(dataset, protected_groups, priv_values, model_seed):
    base_pipeline = GenericPipeline(dataset, protected_groups, priv_values)
    _ = base_pipeline.create_preprocessed_train_test_split(dataset, config.TEST_SET_FRACTION, seed=model_seed)

    print('\nProtected groups splits:')
    for g in base_pipeline.test_groups.keys():
        print(g, base_pipeline.test_groups[g].shape)

    return base_pipeline


def get_model_metrics(base_model, n_estimators, dataset, protected_groups, priv_values, model_seed,
                      dataset_name, base_model_name, exp_num=1):
    base_pipeline = create_base_pipeline(dataset, protected_groups, priv_values, model_seed)

    stability_fairness_analyzer = StabilityFairnessAnalyzer(ModelSetting.BATCH, n_estimators, base_model, base_model_name,
                                                            base_pipeline.X_train_val, base_pipeline.y_train_val,
                                                            base_pipeline.X_test, base_pipeline.y_test,
                                                            base_pipeline.protected_groups, base_pipeline.priv_values, base_pipeline.test_groups,
                                                            base_pipeline.target, dataset_name)

    save_results = False
    y_preds, variance_metrics_df = stability_fairness_analyzer.compute_metrics(save_results=save_results,
                                                                               result_filename=None,
                                                                               save_dir_path=None,
                                                                               make_plots=False)

    bias_analyzer = BiasAnalyzer(base_pipeline.X_test, base_pipeline.y_test,
                                 base_pipeline.protected_groups, base_pipeline.priv_values,
                                 base_pipeline.test_groups)
    dtc_res = bias_analyzer.compute_subgroups_metrics(y_preds,
                                                      save_results=False,
                                                      result_filename=None,
                                                      save_dir_path=None)
    bias_metrics_df = pd.DataFrame(dtc_res)

    metrics_df = pd.concat([variance_metrics_df, bias_metrics_df])
    result_filename = f'{EXPERIMENT_NAME}_Metrics_{dataset_name}_Experiment_{exp_num}_{base_model_name}'
    save_dir_path = os.path.join('..', '..', 'results', 'hypothesis_space')
    save_metrics_to_file(metrics_df, result_filename, save_dir_path)

    return metrics_df


In [112]:
def run_experiment(exp_num, model_seed):
    for model_idx in tqdm(range(len(config.MODELS_CONFIG))):
        print('#' * 30, f' [Experiment {exp_num}] Analyze {config.MODELS_CONFIG[model_idx]["model_name"]} ', '#' * 30)
        model_seed += 1
        try:
            base_model = create_tuned_base_model(config.MODELS_CONFIG[model_idx]['model'],
                                                 config.MODELS_CONFIG[model_idx]['model_name'],
                                                 models_tuned_params_df)
            results_df = get_model_metrics(base_model, N_ESTIMATORS, dataset, PROTECTED_GROUPS, PRIV_VALUES,
                                           model_seed=model_seed,
                                           dataset_name=DATASET_NAME,
                                           base_model_name=config.MODELS_CONFIG[model_idx]['model_name'],
                                           exp_num=exp_num)
            print(f'\n[Experiment {exp_num}] Metrics confusion matrix:')
            display(results_df)
        except Exception as err:
            print(f'ERROR with {config.MODELS_CONFIG[model_idx]["model_name"]}: ', err)

        print('\n\n\n')


### Experiment 1

In [None]:
# TOD: add dataset as a parameter
run_experiment(exp_num=1, model_seed=100)

  0%|          | 0/7 [00:00<?, ?it/s]

##############################  [Experiment 1] Analyze LogisticRegression  ##############################
Baseline X_train shape:  (80684, 16)
Baseline X_test shape:  (20171, 16)


2022-12-16 04:39:21 abstract_stability_analyzer.py INFO    : Start testing of classifier 1 / 200



Protected groups splits:
SEX_RAC1P_priv (6609, 16)
SEX_RAC1P_dis (3662, 16)
SEX_priv (9901, 16)
SEX_dis (10270, 16)
RAC1P_priv (13217, 16)
RAC1P_dis (6954, 16)


2022-12-16 04:39:21 abstract_stability_analyzer.py INFO    : Classifier 1 / 200 was tested
2022-12-16 04:39:21 abstract_stability_analyzer.py INFO    : Start testing of classifier 2 / 200
2022-12-16 04:39:22 abstract_stability_analyzer.py INFO    : Classifier 2 / 200 was tested
2022-12-16 04:39:22 abstract_stability_analyzer.py INFO    : Start testing of classifier 3 / 200
2022-12-16 04:39:23 abstract_stability_analyzer.py INFO    : Classifier 3 / 200 was tested
2022-12-16 04:39:23 abstract_stability_analyzer.py INFO    : Start testing of classifier 4 / 200
2022-12-16 04:39:24 abstract_stability_analyzer.py INFO    : Classifier 4 / 200 was tested
2022-12-16 04:39:24 abstract_stability_analyzer.py INFO    : Start testing of classifier 5 / 200
2022-12-16 04:39:25 abstract_stability_analyzer.py INFO    : Classifier 5 / 200 was tested
2022-12-16 04:39:25 abstract_stability_analyzer.py INFO    : Start testing of classifier 6 / 200
2022-12-16 04:39:26 abstract_stability_analyzer.py INFO    :



##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.8069
Mean: 0.5529
Std: 0.0108
IQR: 0.0144
Entropy: 0.0
Jitter: 0.0142
Per sample accuracy: 0.8061
Label stability: 0.9801



[Experiment 1] Metrics confusion matrix:


Unnamed: 0,overall,SEX_RAC1P_priv,SEX_RAC1P_dis,SEX_priv,SEX_dis,RAC1P_priv,RAC1P_dis
General_Ensemble_Accuracy,0.8069,0.8348,0.7843,0.8349,0.7798,0.8061,0.8083
Mean,0.5529,0.4962,0.5931,0.5196,0.585,0.5383,0.5806
Std,0.0108,0.0103,0.0123,0.0107,0.0109,0.0102,0.0119
IQR,0.0144,0.0138,0.0164,0.0143,0.0145,0.0136,0.0159
Entropy,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Jitter,0.0142,0.013,0.0144,0.0136,0.0147,0.014,0.0146
Per_Sample_Accuracy,0.8061,0.834,0.7836,0.8342,0.779,0.8052,0.8077
Label_Stability,0.9801,0.9818,0.98,0.9809,0.9793,0.9803,0.9796
TPR,0.831983,0.886848,0.778497,0.883288,0.773995,0.835698,0.824347
TNR,0.786559,0.78039,0.78848,0.789648,0.78394,0.780992,0.796504


 14%|█▍        | 1/7 [15:13<1:31:20, 913.41s/it]





##############################  [Experiment 1] Analyze DecisionTreeClassifier  ##############################
Baseline X_train shape:  (80684, 16)
Baseline X_test shape:  (20171, 16)


2022-12-16 04:54:34 abstract_stability_analyzer.py INFO    : Start testing of classifier 1 / 200



Protected groups splits:
SEX_RAC1P_priv (6582, 16)
SEX_RAC1P_dis (3543, 16)
SEX_priv (9817, 16)
SEX_dis (10354, 16)
RAC1P_priv (13393, 16)
RAC1P_dis (6778, 16)


2022-12-16 04:54:34 abstract_stability_analyzer.py INFO    : Classifier 1 / 200 was tested
2022-12-16 04:54:34 abstract_stability_analyzer.py INFO    : Start testing of classifier 2 / 200
2022-12-16 04:54:34 abstract_stability_analyzer.py INFO    : Classifier 2 / 200 was tested
2022-12-16 04:54:34 abstract_stability_analyzer.py INFO    : Start testing of classifier 3 / 200
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Classifier 3 / 200 was tested
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Start testing of classifier 4 / 200
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Classifier 4 / 200 was tested
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Start testing of classifier 5 / 200
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Classifier 5 / 200 was tested
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    : Start testing of classifier 6 / 200
2022-12-16 04:54:35 abstract_stability_analyzer.py INFO    :



##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.8254
Mean: 0.5537
Std: 0.0546
IQR: 0.0587
Entropy: 0.0784
Jitter: 0.0475
Per sample accuracy: 0.8195
Label stability: 0.9338



[Experiment 1] Metrics confusion matrix:


Unnamed: 0,overall,SEX_RAC1P_priv,SEX_RAC1P_dis,SEX_priv,SEX_dis,RAC1P_priv,RAC1P_dis
General_Ensemble_Accuracy,0.8254,0.8617,0.8112,0.8597,0.7928,0.8218,0.8324
Mean,0.5537,0.5078,0.5707,0.5293,0.5769,0.5446,0.5718
Std,0.0546,0.0537,0.0594,0.0542,0.0549,0.0532,0.0574
IQR,0.0587,0.0572,0.0644,0.0571,0.0601,0.0575,0.0609
Entropy,0.0784,0.067,0.1027,0.0657,0.0,0.0756,0.0838
Jitter,0.0475,0.0399,0.0637,0.0387,0.0558,0.0459,0.0506
Per_Sample_Accuracy,0.8195,0.8565,0.8017,0.8545,0.7862,0.8166,0.825
Label_Stability,0.9338,0.9463,0.9074,0.9484,0.9199,0.9362,0.9289
TPR,0.864128,0.869368,0.872921,0.87712,0.850164,0.854982,0.884037
TNR,0.794954,0.854046,0.765686,0.844415,0.752469,0.794283,0.796185


 29%|██▊       | 2/7 [28:38<1:10:47, 849.47s/it]





##############################  [Experiment 1] Analyze RandomForestClassifier  ##############################
Baseline X_train shape:  (80684, 16)
Baseline X_test shape:  (20171, 16)


2022-12-16 05:07:59 abstract_stability_analyzer.py INFO    : Start testing of classifier 1 / 200



Protected groups splits:
SEX_RAC1P_priv (6642, 16)
SEX_RAC1P_dis (3614, 16)
SEX_priv (9811, 16)
SEX_dis (10360, 16)
RAC1P_priv (13388, 16)
RAC1P_dis (6783, 16)


2022-12-16 05:08:10 abstract_stability_analyzer.py INFO    : Classifier 1 / 200 was tested
2022-12-16 05:08:10 abstract_stability_analyzer.py INFO    : Start testing of classifier 2 / 200
2022-12-16 05:08:21 abstract_stability_analyzer.py INFO    : Classifier 2 / 200 was tested
2022-12-16 05:08:21 abstract_stability_analyzer.py INFO    : Start testing of classifier 3 / 200
2022-12-16 05:08:32 abstract_stability_analyzer.py INFO    : Classifier 3 / 200 was tested
2022-12-16 05:08:32 abstract_stability_analyzer.py INFO    : Start testing of classifier 4 / 200
2022-12-16 05:08:44 abstract_stability_analyzer.py INFO    : Classifier 4 / 200 was tested
2022-12-16 05:08:44 abstract_stability_analyzer.py INFO    : Start testing of classifier 5 / 200
2022-12-16 05:09:00 abstract_stability_analyzer.py INFO    : Classifier 5 / 200 was tested
2022-12-16 05:09:00 abstract_stability_analyzer.py INFO    : Start testing of classifier 6 / 200
2022-12-16 05:09:13 abstract_stability_analyzer.py INFO    :

### Experiment 2

In [None]:
run_experiment(exp_num=2, model_seed=200)