In [5]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTHONWARNINGS"] = "ignore"

In [7]:
cur_folder_name = os.getcwd().split('/')[-1]
if cur_folder_name != "fairness-variance":
    os.chdir("../..")

print('Current location: ', os.getcwd())

Current location:  /home/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/fairness-variance


## Import dependencies

In [8]:
import os
import pandas as pd
from IPython.display import display
from tqdm import tqdm

from configs import config
from configs.constants import ModelSetting
from utils.analyzers.subgroups_variance_analyzer import SubgroupsVarianceAnalyzer
from utils.common_helpers import create_tuned_base_model, save_metrics_to_file
from utils.custom_classes.data_loader import CompasWithoutSensitiveAttrsDataset
from utils.custom_classes.generic_pipeline import GenericPipeline
from utils.analyzers.subgroups_statistical_bias_analyzer import SubgroupsStatisticalBiasAnalyzer

## Configs

In [9]:
DATASET_NAME = "COMPAS_Without_Sensitive_Attributes"
EXPERIMENT_NAME = 'Hypothesis_Space'

seed_lst = [101, 111, 201, 221, 311, 333, 410, 555, 666, 851, 999, 1010]
SEX_priv = 0
RACE_priv = 'Caucasian'
N_ESTIMATORS = 20
SENSITIVE_ATTRIBUTES = ['sex', 'race']
PRIV_VALUES = [SEX_priv, RACE_priv]
TUNED_PARAMS_FILE_PATH = os.path.join('results', 'models_tuning', f'tuning_results_{DATASET_NAME}_20230117__012024.csv')

## Models tuned hyper-parameters

In [10]:
models_tuned_params_df = pd.read_csv(TUNED_PARAMS_FILE_PATH)
models_tuned_params_df

Unnamed: 0,Dataset_Name,Model_Name,F1_Score,Accuracy_Score,Model_Best_Params
0,COMPAS_Without_Sensitive_Attributes,LogisticRegression,0.6785,0.6837,"{'C': 1, 'max_iter': 50, 'penalty': 'l2', 'sol..."
1,COMPAS_Without_Sensitive_Attributes,DecisionTreeClassifier,0.6952,0.6989,"{'criterion': 'gini', 'max_depth': 20, 'max_fe..."
2,COMPAS_Without_Sensitive_Attributes,RandomForestClassifier,0.6962,0.7008,"{'max_depth': 4, 'max_features': 0.6, 'min_sam..."
3,COMPAS_Without_Sensitive_Attributes,XGBClassifier,0.6971,0.7008,"{'lambda': 100, 'learning_rate': 0.1, 'max_dep..."
4,COMPAS_Without_Sensitive_Attributes,KNeighborsClassifier,0.7056,0.7112,"{'metric': 'minkowski', 'n_neighbors': 25, 'we..."
5,COMPAS_Without_Sensitive_Attributes,MLPClassifier,0.6834,0.6875,"{'activation': 'relu', 'hidden_layer_sizes': (..."


## Load dataset

In [11]:
dataset = CompasWithoutSensitiveAttrsDataset(dataset_path='data/COMPAS.csv')
dataset.X_data.head()

Unnamed: 0,juv_fel_count,juv_misd_count,juv_other_count,priors_count,age_cat_25 - 45,age_cat_Greater than 45,age_cat_Less than 25,c_charge_degree_F,c_charge_degree_M
0,0.0,-2.340451,1.0,-15.010999,1,0,0,0,1
1,0.0,0.0,0.0,0.0,1,0,0,1,0
2,0.0,0.0,0.0,0.0,0,0,1,1,0
3,0.0,0.0,0.0,6.0,1,0,0,0,1
4,0.0,0.0,0.0,7.513697,1,0,0,1,0


## Run experiments

In [12]:
def create_base_pipeline(dataset, sensitive_attributes, priv_values, model_seed):
    base_pipeline = GenericPipeline(dataset, sensitive_attributes, priv_values)
    _ = base_pipeline.create_train_test_split_without_sensitive_attrs(dataset, config.TEST_SET_FRACTION, seed=model_seed)

    print('\nProtected groups splits:')
    for g in base_pipeline.test_groups.keys():
        print(g, base_pipeline.test_groups[g].shape)

    return base_pipeline


def get_model_metrics(base_model, n_estimators, dataset, sensitive_attributes, priv_values, model_seed,
                      dataset_name, base_model_name, exp_num=1):
    base_pipeline = create_base_pipeline(dataset, sensitive_attributes, priv_values, model_seed)
    print('\n\nX train and validation set: ')
    display(base_pipeline.X_train_val.head(10))

    # Compute variance metrics for subgroups
    save_results = False
    stability_fairness_analyzer = SubgroupsVarianceAnalyzer(ModelSetting.BATCH, n_estimators, base_model, base_model_name,
                                                            base_pipeline.X_train_val, base_pipeline.y_train_val,
                                                            base_pipeline.X_test, base_pipeline.y_test,
                                                            base_pipeline.sensitive_attributes, base_pipeline.priv_values, base_pipeline.test_groups,
                                                            base_pipeline.target, dataset_name)

    y_preds, variance_metrics_df = stability_fairness_analyzer.compute_metrics(save_results=save_results,
                                                                               result_filename=None,
                                                                               save_dir_path=None,
                                                                               make_plots=False)

    # Compute bias metrics for subgroups
    bias_analyzer = SubgroupsStatisticalBiasAnalyzer(base_pipeline.X_test, base_pipeline.y_test,
                                                     base_pipeline.sensitive_attributes, base_pipeline.priv_values,
                                                     base_pipeline.test_groups)
    dtc_res = bias_analyzer.compute_subgroups_metrics(y_preds,
                                                      save_results=False,
                                                      result_filename=None,
                                                      save_dir_path=None)
    bias_metrics_df = pd.DataFrame(dtc_res)

    # Save metrics
    metrics_df = pd.concat([variance_metrics_df, bias_metrics_df])
    result_filename = f'{EXPERIMENT_NAME}_Metrics_{dataset_name}_Experiment_{exp_num}_{base_model_name}'
    save_dir_path = os.path.join('..', '..', 'results', 'hypothesis_space')
    save_metrics_to_file(metrics_df, result_filename, save_dir_path)

    return metrics_df


In [13]:
def run_experiment(dataset, exp_num: int, model_seed: int):
    """
    Find variance and bias metrics for each model in config.MODELS_CONFIG.
    Save results in results/config.MODELS_CONFIG folder.

    :param exp_num: the number of experiment; is used to name the result file with metrics
    """
    for model_idx in tqdm(range(len(config.MODELS_CONFIG))):
        print('#' * 30, f' [Experiment {exp_num}] Analyze {config.MODELS_CONFIG[model_idx]["model_name"]} ', '#' * 30)
        model_seed += 1
        try:
            base_model = create_tuned_base_model(config.MODELS_CONFIG[model_idx]['model'],
                                                 config.MODELS_CONFIG[model_idx]['model_name'],
                                                 models_tuned_params_df)
            results_df = get_model_metrics(base_model, N_ESTIMATORS, dataset, SENSITIVE_ATTRIBUTES, PRIV_VALUES,
                                           model_seed=model_seed,
                                           dataset_name=DATASET_NAME,
                                           base_model_name=config.MODELS_CONFIG[model_idx]['model_name'],
                                           exp_num=exp_num)
            print(f'\n[Experiment {exp_num}] Metrics confusion matrix:')
            display(results_df)
        except Exception as err:
            print(f'ERROR with {config.MODELS_CONFIG[model_idx]["model_name"]}: ', err)

        print('\n\n\n')

### Experiment 1

In [14]:
# TOD: add dataset as a parameter
run_experiment(dataset, exp_num=1, model_seed=100)

  0%|          | 0/1 [00:00<?, ?it/s]

##############################  [Experiment 1] Analyze DecisionTreeClassifier  ##############################
Baseline X_train shape:  (4222, 9)
Baseline X_test shape:  (1056, 9)

Protected groups splits:
sex_race_priv (88, 11)
sex_race_dis (518, 11)
sex_priv (199, 11)
sex_dis (857, 11)
race_priv (427, 11)
race_dis (629, 11)


X train and validation set: 


Unnamed: 0,age_cat_25 - 45_0,c_charge_degree_M_1,juv_other_count,age_cat_Less than 25_0,c_charge_degree_F_0,c_charge_degree_F_1,age_cat_Less than 25_1,age_cat_25 - 45_1,juv_fel_count,juv_misd_count,priors_count,c_charge_degree_M_0,age_cat_Greater than 45_0,age_cat_Greater than 45_1
470,0,0,-0.148179,1,0,1,0,1,-0.098697,-0.12724,0.106154,1,1,0
1328,0,0,-0.148179,1,0,1,0,1,-0.098697,0.348324,2.409132,1,1,0
3551,0,1,-0.148179,1,1,0,0,1,-0.098697,-0.12724,0.620185,0,1,0
1628,0,1,-0.148179,1,1,0,0,1,-0.098697,-0.12724,-0.085761,0,1,0
4138,0,0,1.111816,1,0,1,0,1,-0.098697,-0.12724,-0.280851,1,1,0
5030,0,0,-0.148179,1,0,1,0,1,-0.098697,-0.12724,0.693041,1,1,0
4510,0,1,-0.148179,1,1,0,0,1,-0.098697,-0.12724,-0.085761,0,1,0
1676,1,0,-0.148179,0,0,1,1,0,-0.098697,-0.12724,-0.661506,1,1,0
5047,0,1,-0.148179,1,1,0,0,1,-0.098697,-0.12724,-0.469591,0,1,0
1554,1,1,-0.148179,1,1,0,0,0,-0.098697,-0.12724,-0.668317,0,0,1


2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Start testing of classifier 1 / 200
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Classifier 1 / 200 was tested
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Start testing of classifier 2 / 200
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Classifier 2 / 200 was tested
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Start testing of classifier 3 / 200
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Classifier 3 / 200 was tested
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Start testing of classifier 4 / 200
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Classifier 4 / 200 was tested
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Start testing of classifier 5 / 200
2023-01-21 22:32:33 abstract_overall_variance_analyzer.py INFO    : Classifier 5 / 200 w



##############################  Stability metrics  ##############################
General Ensemble Accuracy: 0.6809
Mean: 0.5252
Std: 0.0722
IQR: 0.0882
Entropy: 0.0
Jitter: 0.1238
Per sample accuracy: 0.6621
Label stability: 0.831



[Experiment 1] Metrics confusion matrix:


Unnamed: 0,overall,sex_race_priv,sex_race_dis,sex_priv,sex_dis,race_priv,race_dis
General_Ensemble_Accuracy,0.680871,0.647727,0.679537,0.688442,0.679113,0.672131,0.686804
Mean,0.5252,0.589072,0.468731,0.560918,0.516906,0.590219,0.481061
Std,0.072228,0.087174,0.074123,0.078152,0.070852,0.070249,0.073572
IQR,0.088245,0.11028,0.088685,0.096666,0.08629,0.08833,0.088188
Entropy,0.0,0.217598,0.210717,0.221136,0.0,0.0,0.213051
Jitter,0.123833,0.137687,0.130049,0.138903,0.120333,0.112123,0.131782
Per_Sample_Accuracy,0.662088,0.64392,0.654508,0.682789,0.657281,0.657892,0.664936
Label_Stability,0.830975,0.801023,0.827973,0.803568,0.83734,0.841218,0.824022
TPR,0.632444,0.44,0.717314,0.571429,0.642686,0.477987,0.707317
TNR,0.72232,0.730159,0.634043,0.751938,0.713636,0.787313,0.664452


100%|██████████| 1/1 [00:36<00:00, 36.15s/it]








