In [None]:
#! pip install fairlearn
#! pip install lightgbm
#! pip install optuna

In [4]:
from sklearn.metrics import f1_score, confusion_matrix, make_scorer
from sklearn.model_selection import cross_val_score, train_test_split, cross_validate
from fairlearn.metrics import (
    count,
    selection_rate,
    equalized_odds_difference,
    false_positive_rate,
    false_negative_rate,
    demographic_parity_difference
)

from fairlearn.datasets import fetch_adult
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier

import numpy as np
import optuna
import tqdm as notebook_tqdm

In [5]:
data = fetch_adult(as_frame=True)
X_raw = data.data
y = (data.target == ">50K") * 1
A = X_raw["sex"]

(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(
    X_raw, y, A, test_size=0.3, random_state=12345, stratify=y
)

X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)
A_train = A_train.reset_index(drop=True)
A_test = A_test.reset_index(drop=True)

numeric_transformer = Pipeline(
    steps=[
        ("impute", SimpleImputer()),
        ("scaler", StandardScaler()),
    ]
)
categorical_transformer = Pipeline(
    [
        ("impute", SimpleImputer(strategy="most_frequent")),
        ("ohe", OneHotEncoder(handle_unknown="ignore")),
    ]
)
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, selector(dtype_exclude="category")),
        ("cat", categorical_transformer, selector(dtype_include="category")),
    ]
)

pipeline = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        (
            "classifier",
            LGBMClassifier(n_jobs=-1),
        ),
    ]
)

  warn(


In [6]:
def metric_scorer(clf, X, y):
    y_pred = clf.predict(X)
    f1 = f1_score(y,y_pred)
    abs_eod = np.abs(equalized_odds_difference(y, y_pred, sensitive_features=X['sex']))
    return {'f1_score': f1, 'eod': abs_eod}


In [None]:
def objective(trial):

    lgbm_n_estimators = trial.suggest_int("lgbm_n_estimators", 20, 10000)
    lgbm_num_leaves = trial.suggest_int("lgbm_num_leaves", 10, 1000)
    lgbm_max_depth = trial.suggest_int("lgbm_max_depth", 2, 20)
    lgbm_min_child_samples = trial.suggest_int("lgbm_min_child_samples", 5, 300)
    lgbm_learning_rate = trial.suggest_float("lgbm_learning_rate", .02, .5)
    lgbm_boosting_type = trial.suggest_categorical("lgbm_boosting_type", ['goss', 'gbdt'])


    #model = LGBMClassifier(
    #    n_estimators = lgbm_n_estimators,
    #    num_leaves = lgbm_num_leaves,
    #    max_depth = lgbm_max_depth,
    #    min_child_samples = lgbm_min_child_samples,
    #    learning_rate = lgbm_learning_rate,
    #    boosting_type = lgbm_boosting_type,
    #)
    params = {
        'n_estimators':lgbm_n_estimators,
        'num_leaves':lgbm_num_leaves,
        'max_depth':lgbm_max_depth,
        'min_child_samples':lgbm_min_child_samples,
        'learning_rate':lgbm_learning_rate,
        'boosting_type':lgbm_boosting_type
        }
    pipeline['classifier'].set_params(**params)
    #pipeline.steps.pop(1)
    #pipeline.steps.insert(1,('classifier',model))

    scores = cross_validate(
            pipeline, 
            X_train,
            y_train, 
            cv=5,
            scoring=metric_scorer,
            return_train_score=True)

    fair_metric = scores['test_eod'].mean()
    model_metric = scores['test_f1_score'].mean()

    return fair_metric, model_metric

In [None]:
study = optuna.create_study(directions=["minimize", "maximize"],pruner=optuna.pruners.SuccessiveHalvingPruner())
study.optimize(objective, n_trials=50)

print("Number of finished trials: ", len(study.trials))

In [None]:
optuna.visualization.plot_pareto_front(study, target_names=["FLOPS", "accuracy"])