In [21]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

from helper.helper_functions import load_dataset, save_model, get_features_and_target, encode_all_features, get_train_test_with_excluded_columns
from helper.clfmodel_functions import tune_model, forward_feat_selection_hypertuning
from helper.fairness_functions import print_male_female_metrics, get_male_female_data

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

### Loading the cleaned dataset

In [22]:
data = load_dataset('../data/assignment2_income_cleaned.xlsx')

### Feature Engineering (encoding) & Train-Test Split

In [23]:
# Splitting the data into features (X) and target (y)
X, y = get_features_and_target(data, 'income')
X_male, X_female = get_male_female_data(X, False)
# Encoding the features and target, and excluding some columns
X_encoded, y_encoded = encode_all_features(X, y, [])
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

In [4]:
X_train

Unnamed: 0,age,education,workinghours,ability to speak english,marital status_Divorced,marital status_Husband,marital status_Never married,marital status_Separated,marital status_Widowed,marital status_Wife,...,occupation_Service/Hospitality,occupation_Transport,gave birth this year_No,gave birth this year_Yes,sex_Female,sex_Male,workclass_governmental,workclass_no paid work,workclass_private,workclass_self employed
6317,22,16,36,0,0,1,0,0,0,0,...,0,0,1,0,0,1,0,0,1,0
740,61,22,40,1,1,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,1
3781,48,16,40,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,1,0
7850,62,18,65,0,0,1,0,0,0,0,...,0,0,1,0,0,1,0,0,1,0
2963,53,19,44,0,1,0,0,0,0,0,...,0,0,1,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5734,22,19,25,0,0,0,1,0,0,0,...,1,0,1,0,1,0,0,0,1,0
5191,24,16,28,0,0,0,1,0,0,0,...,0,0,1,0,1,0,0,0,1,0
5390,35,16,40,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,1,0
860,23,20,40,0,0,0,1,0,0,0,...,1,0,1,0,0,1,0,0,1,0


### Model

Here, we quickly train and evaluate a Random Forest model with random parameters for demonstration.

In [5]:
# Random Forest Classifier model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
dt_preds = rf_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_preds)

print(classification_report(y_test, dt_preds))
print("Decision Tree Accuracy:", dt_accuracy)

              precision    recall  f1-score   support

           0       0.81      0.83      0.82      1175
           1       0.66      0.64      0.65       625

    accuracy                           0.76      1800
   macro avg       0.74      0.73      0.73      1800
weighted avg       0.76      0.76      0.76      1800

Decision Tree Accuracy: 0.7622222222222222


### Feature Importance using the model itself

In [6]:
# Extract feature importances
feature_importances = rf_model.feature_importances_
feature_importance_df = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': feature_importances
})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

In [7]:
feature_importance_df

Unnamed: 0,Feature,Importance
0,age,0.30889
1,education,0.173548
2,workinghours,0.153756
5,marital status_Husband,0.050358
6,marital status_Never married,0.033286
18,occupation_Management/Business,0.028194
25,"occupation_Science, Engineering, Technology",0.022273
26,occupation_Service/Hospitality,0.020277
34,workclass_private,0.014773
31,sex_Male,0.014666


### Hyperparameter tuning

Here, we define the base parameter grid for our hyperparameter tuning function.

In [8]:
param_grid = {
    'n_estimators': [100, 200, 500],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False],
    'criterion': ['gini', 'entropy', 'log_loss'],
    'random_state': [42]
}

Below, we tune the hyperparameters of the Random Forest model using the defined parameter grid and using all features.

In [9]:
best_params, best_model, best_accuracy = tune_model(RandomForestClassifier(), X_encoded, X_train, y_train, X_test, y_test, param_grid)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

rf_preds = best_model.predict(X_test)
print(classification_report(y_test, rf_preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test, y_test)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_unfair.joblib')

Fitting 5 folds for each of 486 candidates, totalling 2430 fits

Best Hyperparameters: {'bootstrap': True, 'criterion': 'log_loss', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200, 'random_state': 42}
Best Model: RandomForestClassifier(criterion='log_loss', max_depth=20, min_samples_split=10,
                       n_estimators=200, random_state=42)
Best Model Accuracy: 0.7883333333333333
              precision    recall  f1-score   support

           0       0.83      0.85      0.84      1175
           1       0.71      0.67      0.69       625

    accuracy                           0.79      1800
   macro avg       0.77      0.76      0.76      1800
weighted avg       0.79      0.79      0.79      1800
Fairness Metrics:
Male FPR: 0.19040902679830748
Male TPR: 0.7334669338677354
Female FPR: 0.0815450643776824
Female TPR: 0.40476190476190477

Disparate Impact (DI): 0.362
Discrimination Score (DS): -0.264
Equal Opportunity Difference (EO): 0.329


Now the same but with fairness constraints.

In [10]:
best_params, best_model, best_accuracy = tune_model(RandomForestClassifier(), X_encoded, X_train, y_train, X_test, y_test, param_grid, ensure_fairness=True)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

rf_preds = best_model.predict(X_test)
print(classification_report(y_test, rf_preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test, y_test)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_fair.joblib')

Fitting 5 folds for each of 486 candidates, totalling 2430 fits

Best Hyperparameters: {'bootstrap': True, 'criterion': 'gini', 'max_depth': 30, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200, 'random_state': 42}
Best Model: RandomForestClassifier(max_depth=30, min_samples_leaf=4, min_samples_split=10,
                       n_estimators=200, random_state=42)
Best Model Accuracy: 0.7855555555555556
              precision    recall  f1-score   support

           0       0.82      0.86      0.84      1175
           1       0.71      0.64      0.67       625

    accuracy                           0.79      1800
   macro avg       0.77      0.75      0.76      1800
weighted avg       0.78      0.79      0.78      1800
Fairness Metrics:
Male FPR: 0.18053596614950634
Male TPR: 0.6933867735470942
Female FPR: 0.07081545064377683
Female TPR: 0.42857142857142855

Disparate Impact (DI): 0.375
Discrimination Score (DS): -0.245
Equal Opportunity Difference (EO): 0.265
Equal

Below, we tune the hyperparameters of the Random Forest model using the defined parameter grid and using a **subset of features**. We exclude the columns 'age', 'ability to speak english', and 'workclass'.

In [12]:
columns_to_exclude = ['age', 'ability to speak english', 'workclass']
X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)

best_params, best_model, best_accuracy = tune_model(RandomForestClassifier(), X_encoded, X_train_, y_train_, X_test_, y_test_, param_grid)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

rf_preds = best_model.predict(X_test_)
print(classification_report(y_test_, rf_preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test_, y_test_)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_unfair_aawexcl.joblib')

Fitting 5 folds for each of 486 candidates, totalling 2430 fits

Best Hyperparameters: {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 500, 'random_state': 42}
Best Model: RandomForestClassifier(max_depth=20, min_samples_leaf=4, min_samples_split=10,
                       n_estimators=500, random_state=42)
Best Model Accuracy: 0.7794444444444445
              precision    recall  f1-score   support

           0       0.81      0.86      0.84      1175
           1       0.71      0.62      0.66       625

    accuracy                           0.78      1800
   macro avg       0.76      0.74      0.75      1800
weighted avg       0.77      0.78      0.78      1800
Fairness Metrics:
Male FPR: 0.18758815232722145
Male TPR: 0.6773547094188377
Female FPR: 0.055793991416309016
Female TPR: 0.3888888888888889

Disparate Impact (DI): 0.325
Discrimination Score (DS): -0.263
Equal Opportunity Difference (EO): 0.288
Equal

Now the same but with fairness constraints.

In [13]:
columns_to_exclude = ['age', 'ability to speak english', 'workclass']
X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)

best_params, best_model, best_accuracy = tune_model(RandomForestClassifier(), X_encoded, X_train_, y_train_, X_test_, y_test_, param_grid, ensure_fairness=True)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

rf_preds = best_model.predict(X_test_)
print(classification_report(y_test_, rf_preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test_, y_test_)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_fair_aawexcl.joblib')

Fitting 5 folds for each of 486 candidates, totalling 2430 fits

Best Hyperparameters: {'bootstrap': True, 'criterion': 'gini', 'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 100, 'random_state': 42}
Best Model: RandomForestClassifier(max_depth=20, min_samples_leaf=4, min_samples_split=10,
                       random_state=42)
Best Model Accuracy: 0.7794444444444445
              precision    recall  f1-score   support

           0       0.81      0.87      0.84      1175
           1       0.71      0.62      0.66       625

    accuracy                           0.78      1800
   macro avg       0.76      0.74      0.75      1800
weighted avg       0.77      0.78      0.78      1800

Fairness Metrics:
Male FPR: 0.1847672778561354
Male TPR: 0.6733466933867736
Female FPR: 0.05793991416309013
Female TPR: 0.3968253968253968

Disparate Impact (DI): 0.336
Discrimination Score (DS): -0.257
Equal Opportunity Difference (EO): 0.277
Equalized Odds (EOdds): 

#### Forward Feature Selection with Hyperparameter Tuning

Because hyperparameter tuning results can vary depending on the chosen subset of features, we will use forward feature selection with hyperparameter tuning to find the best subset of features and hyperparameters for the model.

Everything here is commented because it takes a long time to run, because of the size of the param_grid.

In [None]:
# # Forward feature selection with hyperparameter tuning
# best_subset, best_params, best_score = forward_feat_selection_hypertuning(RandomForestClassifier(), param_grid, X_encoded, X_train, y_train, X_test, y_test)
# 
# print("Best subset of features:", best_subset)
# print("Best hyperparameters:", best_params)
# print("Best model accuracy:", best_score)
# 
# # Use the best subset and best hyperparameters for final model
# final_model = RandomForestClassifier(**best_params)
# final_model.fit(X_train[best_subset], y_train)
# final_model_preds = final_model.predict(X_test[best_subset])
# final_model_accuracy = accuracy_score(y_test, final_model_preds)
# 
# print(classification_report(y_test, final_model_preds))
# 
# print_male_female_metrics(final_model, X, X_male, X_female, X_test[best_subset], y_test)
# save_model(final_model, '../output/saved_models/rf_model_sexincl_ffs_tuned_unfair.joblib')

Now the same but with fairness constraints.

In [ ]:
# # Forward feature selection with hyperparameter tuning
# best_subset, best_params, best_score = forward_feat_selection_hypertuning(RandomForestClassifier(), param_grid, X_encoded, X_train, y_train, X_test, y_test, ensure_fairness=True)
# 
# print("Best subset of features:", best_subset)
# print("Best hyperparameters:", best_params)
# print("Best model accuracy:", best_score)
# 
# # Use the best subset and best hyperparameters for final model
# final_model = RandomForestClassifier(**best_params)
# final_model.fit(X_train[best_subset], y_train)
# final_model_preds = final_model.predict(X_test[best_subset])
# final_model_accuracy = accuracy_score(y_test, final_model_preds)
# 
# print(classification_report(y_test, final_model_preds))
# 
# print_male_female_metrics(final_model, X, X_male, X_female, X_test[best_subset], y_test)
# save_model(final_model, '../output/saved_models/rf_model_sexincl_ffs_tuned_fair.joblib')

#### Removing sex-related columns

Below we do Forward feature selection with hyperparameter tuning, but we exclude all sex-related columns.

In [ ]:
# # Forward feature selection with hyperparameter tuning
# columns_to_exclude = ['sex', 'gave birth this year', 'marital status']
# X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)
# 
# best_subset, best_params, best_score = forward_feat_selection_hypertuning(RandomForestClassifier(), param_grid, X_encoded, X_train_, y_train_, X_test_, y_test_)
# 
# print("Best subset of features:", best_subset)
# print("Best hyperparameters:", best_params)
# print("Best model accuracy:", best_score)
# 
# # Use the best subset and best hyperparameters for final model
# final_model_sexexcl_ffs = RandomForestClassifier(**best_params)
# final_model_sexexcl_ffs.fit(X_train_[best_subset], y_train_)
# final_model_preds = final_model_sexexcl_ffs.predict(X_test_[best_subset])
# final_model_accuracy = accuracy_score(y_test_, final_model_preds)
# 
# print(classification_report(y_test_, final_model_preds))
# 
# print_male_female_metrics(final_model_sexexcl_ffs, X, X_male, X_female, X_test_[best_subset], y_test_)
# save_model(final_model_sexexcl_ffs, '../output/saved_models/rf_model_sexexcl_ffs_tuned_unfair.joblib')

Now the same but with fairness constraints.

In [ ]:
# # Forward feature selection with hyperparameter tuning
# columns_to_exclude = ['sex', 'gave birth this year', 'marital status']
# X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)
# 
# best_subset, best_params, best_score = forward_feat_selection_hypertuning(RandomForestClassifier(), param_grid, X_encoded, X_train_, y_train_, X_test_, y_test_, ensure_fairness=True)
# 
# print("Best subset of features:", best_subset)
# print("Best hyperparameters:", best_params)
# print("Best model accuracy:", best_score)
# 
# # Use the best subset and best hyperparameters for final model
# final_model_sexexcl_ffs = RandomForestClassifier(**best_params)
# final_model_sexexcl_ffs.fit(X_train_[best_subset], y_train_)
# final_model_preds = final_model_sexexcl_ffs.predict(X_test_[best_subset])
# final_model_accuracy = accuracy_score(y_test_, final_model_preds)
# 
# print(classification_report(y_test_, final_model_preds))
# 
# print_male_female_metrics(final_model_sexexcl_ffs, X, X_male, X_female, X_test_[best_subset], y_test_)
# save_model(final_model_sexexcl_ffs, '../output/saved_models/rf_model_sexexcl_ffs_tuned_fair.joblib')

### AdaBoost Classifier

In [24]:
# Define the parameter grid for AdaBoost
param_grid = {
    'algorithm': ['SAMME', 'SAMME.R'],
    'n_estimators': [50, 75, 100, 150],
    'learning_rate': [0.1, 0.5, 1.0],
    'estimator': [RandomForestClassifier()],
    'random_state': [42]
}

Below, we tune the hyperparameters of the AdaBoost model using the defined parameter grid and using all features.

In [16]:
best_params, best_model, best_accuracy = tune_model(AdaBoostClassifier(), X_encoded, X_train, y_train, X_test, y_test, param_grid)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

preds = best_model.predict(X_test)
print(classification_report(y_test, preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test, y_test)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_unfair_ada.joblib')

Fitting 5 folds for each of 24 candidates, totalling 120 fits

Best Hyperparameters: {'algorithm': 'SAMME', 'estimator': RandomForestClassifier(), 'learning_rate': 1.0, 'n_estimators': 150, 'random_state': 42}
Best Model: AdaBoostClassifier(algorithm='SAMME', estimator=RandomForestClassifier(),
                   n_estimators=150, random_state=42)
Best Model Accuracy: 0.7644444444444445
              precision    recall  f1-score   support

           0       0.81      0.84      0.82      1175
           1       0.67      0.63      0.65       625

    accuracy                           0.76      1800
   macro avg       0.74      0.73      0.74      1800
weighted avg       0.76      0.76      0.76      1800
Fairness Metrics:
Male FPR: 0.2002820874471086
Male TPR: 0.6813627254509018
Female FPR: 0.10515021459227468
Female TPR: 0.4126984126984127

Disparate Impact (DI): 0.428
Discrimination Score (DS): -0.228
Equal Opportunity Difference (EO): 0.269
Equalized Odds (EOdds): 0.095


Now the same but with fairness constraints.

In [None]:
best_params, best_model, best_accuracy = tune_model(AdaBoostClassifier(), X_encoded, X_train, y_train, X_test, y_test, param_grid, ensure_fairness=True)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

preds = best_model.predict(X_test)
print(classification_report(y_test, preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test, y_test)
save_model(best_model, '../output/saved_models/rf_model_sexincl_noffs_tuned_fair_ada.joblib')

Fitting 5 folds for each of 24 candidates, totalling 120 fits


Below, we tune the hyperparameters of the AdaBoost model using the defined parameter grid and using a **subset of features**. We exclude the sex-related columns.

In [18]:
columns_to_exclude = ['sex', 'gave birth this year', 'marital status']
X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)

best_params, best_model, best_accuracy = tune_model(AdaBoostClassifier(), X_encoded, X_train_, y_train_, X_test_, y_test_, param_grid)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

preds = best_model.predict(X_test_)
print(classification_report(y_test_, preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test_, y_test_)
save_model(best_model, '../output/saved_models/rf_model_sexexcl_noffs_tuned_unfair_ada.joblib')

Fitting 5 folds for each of 24 candidates, totalling 120 fits

Best Hyperparameters: {'algorithm': 'SAMME.R', 'estimator': RandomForestClassifier(), 'learning_rate': 0.1, 'n_estimators': 50, 'random_state': 42}
Best Model: AdaBoostClassifier(estimator=RandomForestClassifier(), learning_rate=0.1,
                   random_state=42)
Best Model Accuracy: 0.7288888888888889
              precision    recall  f1-score   support

           0       0.78      0.82      0.80      1175
           1       0.62      0.55      0.59       625

    accuracy                           0.73      1800
   macro avg       0.70      0.69      0.69      1800
weighted avg       0.72      0.73      0.72      1800
Fairness Metrics:
Male FPR: 0.1777150916784203
Male TPR: 0.5651302605210421
Female FPR: 0.1759656652360515
Female TPR: 0.5

Disparate Impact (DI): 0.725
Discrimination Score (DS): -0.093
Equal Opportunity Difference (EO): 0.065
Equalized Odds (EOdds): 0.002


Now the same but with fairness constraints.

In [None]:
columns_to_exclude = ['sex', 'gave birth this year', 'marital status']
X_train_, X_test_, y_train_, y_test_, X_male, X_female = get_train_test_with_excluded_columns(data, columns_to_exclude)

best_params, best_model, best_accuracy = tune_model(AdaBoostClassifier(), X_encoded, X_train_, y_train_, X_test_, y_test_, param_grid, ensure_fairness=True)

print("\nBest Hyperparameters:", best_params)
print("Best Model:", best_model)
print("Best Model Accuracy:", best_accuracy)

preds = best_model.predict(X_test_)
print(classification_report(y_test_, preds))

print_male_female_metrics(best_model, X, X_male, X_female, X_test_, y_test_)
save_model(best_model, '../output/saved_models/rf_model_sexexcl_noffs_tuned_fair_ada.joblib')