In [2]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import optuna
import plotly.express as px
import random

# Load and prepare data
DATA_PATH = 'german_credit_data_cleaned.csv'
df = pd.read_csv(DATA_PATH)
y = df['Risk_good'].values
X_full = df.drop(columns=['Risk_good'])
sensitive = df['Sex_male'].values



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
GLOBAL_SEED = 873
random.seed(GLOBAL_SEED)
np.random.seed(GLOBAL_SEED)

In [4]:
# Define the custom objective function factory
def make_custom_obj(sens_array, alpha=0.5):
    def custom_obj(preds, dmatrix):
        labels = dmatrix.get_label()
        preds_prob = 1.0 / (1.0 + np.exp(-preds))
        grad_log = preds_prob - labels
        hess_log = preds_prob * (1.0 - preds_prob)
        mask_p = (sens_array == 1)
        mask_u = (sens_array == 0)
        n_p, n_u = mask_p.sum(), mask_u.sum()
        mean_p = preds_prob[mask_p].mean() if n_p > 0 else 0
        mean_u = preds_prob[mask_u].mean() if n_u > 0 else 0
        diff = mean_p - mean_u
        sign = np.sign(diff)
        sigmoid_deriv = preds_prob * (1.0 - preds_prob)
        grad_fair = np.zeros_like(preds)
        grad_fair[mask_p] = sign * (1.0 / n_p) * sigmoid_deriv[mask_p] if n_p > 0 else 0
        grad_fair[mask_u] = -sign * (1.0 / n_u) * sigmoid_deriv[mask_u] if n_u > 0 else 0
        hess_fair = np.zeros_like(preds)
        grad = alpha * grad_log + (1 - alpha) * grad_fair
        hess = alpha * hess_log + (1 - alpha) * hess_fair
        return grad, hess
    return custom_obj

# Define the objective function for Optuna

def create_objective_function(alpha,seed=GLOBAL_SEED):
    def objective(trial):
        # Suggest hyperparameters
        # alpha = trial.suggest_float('alpha', 0.0, 1.0)
        max_depth = trial.suggest_int('max_depth', 3, 7)
        eta = trial.suggest_float('eta', 0.01, 0.3)
        subsample = trial.suggest_float('subsample', 0.6, 1.0)
        colsample_bytree = trial.suggest_float('colsample_bytree', 0.6, 1.0)

        # Initialize cross-validation
        kf = KFold(n_splits=3, shuffle=True, random_state=seed)
        acc_scores = []
        dpd_scores = []

        # Perform 3-fold cross-validation
        for train_idx, val_idx in kf.split(X_full):
            # Split data
            X_train = X_full.iloc[train_idx].drop(columns=['Sex_male']).values
            y_train = y[train_idx]
            sens_train = sensitive[train_idx]
            X_val = X_full.iloc[val_idx].drop(columns=['Sex_male']).values
            y_val = y[val_idx]
            sens_val = sensitive[val_idx]

            # Create custom objective
            train_obj = make_custom_obj(sens_train, alpha=alpha)

            # Set up DMatrix
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            # Train the model
            params = {
                'max_depth': max_depth,
                'eta': eta,
                'subsample': subsample,
                'colsample_bytree': colsample_bytree,
                'verbosity': 0,
                'seed': seed + trial.number
            }
            bst = xgb.train(
                params,
                dtrain,
                num_boost_round=100,
                obj=train_obj,
                evals=[(dtrain, 'train'), (dval, 'val')],
                early_stopping_rounds=10,
                verbose_eval=False
            )

            # Predict on validation set
            pred_prob = bst.predict(dval)
            pred = (pred_prob > 0.5).astype(int)

            # Compute accuracy and DPD
            acc = accuracy_score(y_val, pred)
            dpd_val = abs(pred[sens_val == 1].mean() - pred[sens_val == 0].mean())
            acc_scores.append(acc)
            dpd_scores.append(dpd_val)

        # Compute mean metrics across folds
        mean_acc = np.mean(acc_scores)
        mean_dpd = np.mean(dpd_scores)

        # Store metrics for analysis
        trial.set_user_attr('accuracy', mean_acc)
        trial.set_user_attr('dpd', mean_dpd)
        trial.set_user_attr('params', {
            'alpha': alpha,
            'max_depth': max_depth,
            'eta': eta,
            'subsample': subsample,
            'colsample_bytree': colsample_bytree
        })

        return mean_acc, mean_dpd
    return objective


In [10]:
# Create and run Optuna study for multi-objective optimization

def test_loss(a,dpd,accuracy):
    return (1-a)*(1-dpd) + a*accuracy

def is_pareto_efficient(acc, dpd):
    n_points = len(acc)
    is_efficient = [True] * n_points
    for i in range(n_points):
        for j in range(n_points):
            if (acc[j] >= acc[i] and dpd[j] <= dpd[i]) and (acc[j] > acc[i] or dpd[j] < dpd[i]):
                is_efficient[i] = False
                break
    return is_efficient

def normalise(value, min_val, max_val):
    return (value - min_val) / (max_val - min_val)
#adaptive weights based on distance

results = []
count_alpha = 20
trials_per_alpha = 100
sampler = optuna.samplers.NSGAIISampler(seed=GLOBAL_SEED)
study = optuna.create_study(directions=['maximize', 'minimize'], sampler=sampler)
objective = create_objective_function(alpha=0, seed=GLOBAL_SEED)  
study.optimize(objective, n_trials=trials_per_alpha)
print(f"Completed study for alpha=0")
max_val = 0
for trial in study.best_trials:
    val = test_loss(0,trial.values[1],trial.values[0])
    if val > max_val:
        max_val = val
        best_trial = trial
results.append({
    'accuracy': best_trial.values[0],
    'dpd': best_trial.values[1],
    'alpha': 0,
    'params': best_trial.user_attrs['params']
    })
study = optuna.create_study(directions=['maximize', 'minimize'], sampler=sampler)
objective = create_objective_function(alpha=1, seed=GLOBAL_SEED)  
study.optimize(objective, n_trials=trials_per_alpha)
print(f"Completed study for alpha=1")
max_val = 0
for trial in study.best_trials:
    val = test_loss(1,trial.values[1],trial.values[0])
    if val > max_val:
        max_val = val
        best_trial = trial
results.append({
    'accuracy': best_trial.values[0],
    'dpd': best_trial.values[1],
    'alpha': 1,
    'params': best_trial.user_attrs['params']
})
print(results)

max_acc = max([res['accuracy'] for res in results])
max_dpd = max([res['dpd'] for res in results])
min_acc = min([res['accuracy'] for res in results])
min_dpd = min([res['dpd'] for res in results])




for j in range(count_alpha):
    print(j)
    
    results.sort(key=lambda x: x['alpha'])
    max_dist = 0
    print(f"max_acc: {max_acc}, max_dpd: {max_dpd}, min_acc: {min_acc}, min_dpd: {min_dpd}")
    for i in range(len(results)-1):
        acc_dist = (normalise(results[i]['accuracy'],min_acc,max_acc) - normalise(results[i+1]['accuracy'],min_acc,max_acc))**2
        dpd_dist = (normalise(results[i]['dpd'],min_dpd,max_dpd) - normalise(results[i+1]['dpd'],min_dpd,max_dpd))**2

        dist = acc_dist + dpd_dist
        if dist > max_dist:
            max_dist = dist
            print(f"max dist: {max_dist}, acc_dist: {acc_dist}, dpd_dist: {dpd_dist}")
            alpha = (results[i]['alpha']+results[i+1]['alpha'])/2
    study = optuna.create_study(directions=['maximize', 'minimize'], sampler=sampler)
    objective = create_objective_function(alpha=alpha, seed=GLOBAL_SEED)  
    study.optimize(objective, n_trials=trials_per_alpha, )
    # print(f"Completed study for alpha={alpha}")
    max_val = 0
    for trial in study.best_trials:
        val = test_loss(alpha,trial.values[1],trial.values[0])
        if val > max_val:
            max_val = val
            best_trial = trial
        
    results.append({
        'accuracy': best_trial.values[0],
        'dpd': best_trial.values[1],
        'alpha': alpha,
        'params': best_trial.user_attrs['params']
    })
    max_acc = max(max_acc, best_trial.values[0])
    max_dpd = max(max_dpd, best_trial.values[1])
    min_acc = min(min_acc, best_trial.values[0])
    min_dpd = min(min_dpd, best_trial.values[1])
acc = [res['accuracy'] for res in results]
dpd = [res['dpd'] for res in results]
is_efficient = is_pareto_efficient(acc, dpd)
# Filter results to only include Pareto-optimal solutions
results = [res for res, efficient in zip(results, is_efficient) if efficient]
    

# Print summary of Pareto-optimal results
print("Pareto-optimal solutions:")
for res in results:
    print(f"Params: {res['params']}, Accuracy: {res['accuracy']:.4f}, DPD: {res['dpd']:.4f}")

# Prepare data for plotting
plot_df = pd.DataFrame({
    'DPD': [res['dpd'] for res in results],
    'Accuracy': [res['accuracy'] for res in results],
    'alpha': [res['alpha'] for res in results]
})



[I 2025-05-16 19:43:45,179] A new study created in memory with name: no-name-b0d58415-a8cf-4a5d-8246-347a294d8385
[I 2025-05-16 19:43:45,423] Trial 0 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 4, 'eta': 0.23427942936231477, 'subsample': 0.6945965931961444, 'colsample_bytree': 0.6527247741457783}.
[I 2025-05-16 19:43:45,576] Trial 1 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 3, 'eta': 0.14502880540829954, 'subsample': 0.6831438346551075, 'colsample_bytree': 0.6650056059966459}.
[I 2025-05-16 19:43:45,700] Trial 2 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 7, 'eta': 0.2189983990110477, 'subsample': 0.7281971459251811, 'colsample_bytree': 0.958988210314949}.
[I 2025-05-16 19:43:45,817] Trial 3 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 3, 'eta': 0.0380102539744466, 'subsample': 0.7932399343106844, 'colsample_bytree': 0.7689704204658687}.
[I 2025-05-16

Completed study for alpha=0


[I 2025-05-16 19:44:01,817] Trial 1 finished with values: [0.7071481743573719, 0.06816872313093723] and parameters: {'max_depth': 5, 'eta': 0.26804446437070234, 'subsample': 0.7768440223325587, 'colsample_bytree': 0.9826416456778452}.
[I 2025-05-16 19:44:01,988] Trial 2 finished with values: [0.7192793008059848, 0.08476099675351052] and parameters: {'max_depth': 6, 'eta': 0.24795257897021458, 'subsample': 0.8804583954508851, 'colsample_bytree': 0.8762311758971264}.
[I 2025-05-16 19:44:02,155] Trial 3 finished with values: [0.7014303088229438, 0.06482682424145518] and parameters: {'max_depth': 5, 'eta': 0.27301840257577104, 'subsample': 0.9272235602328082, 'colsample_bytree': 0.7767344519858717}.
[I 2025-05-16 19:44:02,332] Trial 4 finished with values: [0.7257063470911335, 0.06518022546804354] and parameters: {'max_depth': 7, 'eta': 0.20836801881162217, 'subsample': 0.6775262468224649, 'colsample_bytree': 0.8672560411849988}.
[I 2025-05-16 19:44:02,481] Trial 5 finished with values: [0

Completed study for alpha=1
[{'accuracy': 0.5000091902473095, 'dpd': 0.0, 'alpha': 0, 'params': {'alpha': 0, 'max_depth': 4, 'eta': 0.23427942936231477, 'subsample': 0.6945965931961444, 'colsample_bytree': 0.6527247741457783}}, {'accuracy': 0.7385818835718202, 'dpd': 0.09117831450646376, 'alpha': 1, 'params': {'alpha': 1, 'max_depth': 7, 'eta': 0.24152465711376142, 'subsample': 0.7910317238008687, 'colsample_bytree': 0.9963228652716941}}]
0
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 2.0, acc_dist: 1.0, dpd_dist: 1.0


[I 2025-05-16 19:44:24,626] Trial 0 finished with values: [0.6942848915397647, 0.05097018711350069] and parameters: {'max_depth': 5, 'eta': 0.2325644450902913, 'subsample': 0.9584475342564492, 'colsample_bytree': 0.8941830785195612}.
[I 2025-05-16 19:44:24,904] Trial 1 finished with values: [0.7064343984829965, 0.0800585520683066] and parameters: {'max_depth': 6, 'eta': 0.24433363493605106, 'subsample': 0.7121431009479138, 'colsample_bytree': 0.8930527737865122}.
[I 2025-05-16 19:44:25,214] Trial 2 finished with values: [0.7064236765278021, 0.07365540081371169] and parameters: {'max_depth': 7, 'eta': 0.1420219505231513, 'subsample': 0.952352654957318, 'colsample_bytree': 0.9055682237318756}.
[I 2025-05-16 19:44:25,568] Trial 3 finished with values: [0.6692874188577748, 0.033315882913593796] and parameters: {'max_depth': 3, 'eta': 0.142339334176878, 'subsample': 0.8016583335849767, 'colsample_bytree': 0.651029598561798}.
[I 2025-05-16 19:44:25,797] Trial 4 finished with values: [0.66213

1
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 1.2968173511471432, acc_dist: 0.9351504396182893, dpd_dist: 0.3616669115288539


[I 2025-05-16 19:44:43,013] Trial 1 finished with values: [0.6592914932007486, 0.024270475022529863] and parameters: {'max_depth': 3, 'eta': 0.040199489733183276, 'subsample': 0.6051850728699393, 'colsample_bytree': 0.9241754095503961}.
[I 2025-05-16 19:44:43,173] Trial 2 finished with values: [0.6678721207721033, 0.06650741794190586] and parameters: {'max_depth': 5, 'eta': 0.11574601306423779, 'subsample': 0.786422464708761, 'colsample_bytree': 0.6892177677728888}.
[I 2025-05-16 19:44:43,306] Trial 3 finished with values: [0.6714348733124408, 0.04887055509652596] and parameters: {'max_depth': 6, 'eta': 0.20595693482012256, 'subsample': 0.7739120103554484, 'colsample_bytree': 0.758744731263732}.
[I 2025-05-16 19:44:43,420] Trial 4 finished with values: [0.6571501655776224, 0.05671827550122067] and parameters: {'max_depth': 3, 'eta': 0.13032846333617543, 'subsample': 0.8736242559719034, 'colsample_bytree': 0.6223203599338488}.
[I 2025-05-16 19:44:43,561] Trial 5 finished with values: [0

2
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5679790844884961, acc_dist: 0.5249447007342952, dpd_dist: 0.04303438375420092


[I 2025-05-16 19:44:58,708] Trial 1 finished with values: [0.6735823277671069, 0.021840646992350554] and parameters: {'max_depth': 4, 'eta': 0.11706172269452549, 'subsample': 0.768518722753871, 'colsample_bytree': 0.659430620721757}.
[I 2025-05-16 19:44:58,863] Trial 2 finished with values: [0.6728670201848465, 0.03963709548096137] and parameters: {'max_depth': 5, 'eta': 0.21367377316108155, 'subsample': 0.9910279155360668, 'colsample_bytree': 0.9425934072993867}.
[I 2025-05-16 19:44:59,017] Trial 3 finished with values: [0.6614389476554148, 0.03594239964235075] and parameters: {'max_depth': 4, 'eta': 0.216225451118672, 'subsample': 0.9945000886176694, 'colsample_bytree': 0.805266612811347}.
[I 2025-05-16 19:44:59,151] Trial 4 finished with values: [0.6557256772446413, 0.05755840097730381] and parameters: {'max_depth': 3, 'eta': 0.2577445397187438, 'subsample': 0.8714386479242018, 'colsample_bytree': 0.6404226092488476}.
[I 2025-05-16 19:44:59,297] Trial 5 finished with values: [0.6764

3
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5008404776321352, acc_dist: 0.4659949517262321, dpd_dist: 0.03484552590590307


[I 2025-05-16 19:45:21,914] Trial 0 finished with values: [0.6578715999914224, 0.03235523273662307] and parameters: {'max_depth': 5, 'eta': 0.010349791356126863, 'subsample': 0.6049503714581467, 'colsample_bytree': 0.6575686565893083}.
[I 2025-05-16 19:45:22,166] Trial 1 finished with values: [0.6707241608538351, 0.03197319672323209] and parameters: {'max_depth': 7, 'eta': 0.2830156035682387, 'subsample': 0.8942343740793945, 'colsample_bytree': 0.7655197303092837}.
[I 2025-05-16 19:45:22,454] Trial 2 finished with values: [0.6592976200322884, 0.038123466784988326] and parameters: {'max_depth': 4, 'eta': 0.1337702739903033, 'subsample': 0.898089706536626, 'colsample_bytree': 0.8794059738573755}.
[I 2025-05-16 19:45:22,706] Trial 3 finished with values: [0.6678613988169088, 0.024615535943817424] and parameters: {'max_depth': 3, 'eta': 0.11060296233427967, 'subsample': 0.9320280841856892, 'colsample_bytree': 0.7006165223791941}.
[I 2025-05-16 19:45:22,964] Trial 4 finished with values: [0

4
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.500969525320793, acc_dist: 0.47011502975908526, dpd_dist: 0.030854495561707673


[I 2025-05-16 19:45:39,069] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 6, 'eta': 0.14325449015587724, 'subsample': 0.8015152961785454, 'colsample_bytree': 0.8868664896303637}.
[I 2025-05-16 19:45:39,187] Trial 2 finished with values: [0.6614404793632996, 0.039160973616569235] and parameters: {'max_depth': 6, 'eta': 0.07344488043204586, 'subsample': 0.7919694864962906, 'colsample_bytree': 0.7207428665085304}.
[I 2025-05-16 19:45:39,295] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.17508913492872258, 'subsample': 0.7303776225535195, 'colsample_bytree': 0.9572364007534981}.
[I 2025-05-16 19:45:39,387] Trial 4 finished with values: [0.6700057898558051, 0.03298331455212159] and parameters: {'max_depth': 3, 'eta': 0.23764777468906625, 'subsample': 0.7694789127620378, 'colsample_bytree': 0.8218350394057735}.
[I 2025-05-16 19:45:39,494] Trial 5 finished with values: [

5
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5137048783917899, acc_dist: 0.4659949517262321, dpd_dist: 0.04770992666555784


[I 2025-05-16 19:45:50,101] Trial 1 finished with values: [0.6156408818961319, 0.018745690965196255] and parameters: {'max_depth': 7, 'eta': 0.07683676210659128, 'subsample': 0.9042066844545453, 'colsample_bytree': 0.6076641144719312}.
[I 2025-05-16 19:45:50,183] Trial 2 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 7, 'eta': 0.09846115378619846, 'subsample': 0.8176032213384978, 'colsample_bytree': 0.8378989637123635}.
[I 2025-05-16 19:45:50,269] Trial 3 finished with values: [0.6664231251129634, 0.025569452855030834] and parameters: {'max_depth': 6, 'eta': 0.22932211532549612, 'subsample': 0.9644510318844257, 'colsample_bytree': 0.7658888641021533}.
[I 2025-05-16 19:45:50,361] Trial 4 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 4, 'eta': 0.21013295670161697, 'subsample': 0.8346069215059522, 'colsample_bytree': 0.6950624357566285}.
[I 2025-05-16 19:45:50,443] Trial 5 finished with values: [0.5000091902473095, 0.0] and pa

6
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5137048783917899, acc_dist: 0.4659949517262321, dpd_dist: 0.04770992666555784


[I 2025-05-16 19:46:00,589] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.21849436295756347, 'subsample': 0.8666676315504229, 'colsample_bytree': 0.7993321544085574}.
[I 2025-05-16 19:46:00,751] Trial 2 finished with values: [0.6592991517401733, 0.041408991860887805] and parameters: {'max_depth': 4, 'eta': 0.08940802112686976, 'subsample': 0.7019764554168482, 'colsample_bytree': 0.794434666682162}.
[I 2025-05-16 19:46:00,878] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.2532493353400336, 'subsample': 0.7464012861414834, 'colsample_bytree': 0.7872817577596111}.
[I 2025-05-16 19:46:01,031] Trial 4 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.09062202348613405, 'subsample': 0.8706956252293125, 'colsample_bytree': 0.927650736466578}.
[I 2025-05-16 19:46:01,184] Trial 5 finished with values: [0.6

7
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5209497565036451, acc_dist: 0.4741206085817544, dpd_dist: 0.0468291479218906


[I 2025-05-16 19:46:18,340] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.22272377308046565, 'subsample': 0.9291860436532793, 'colsample_bytree': 0.7935660480152006}.
[I 2025-05-16 19:46:18,716] Trial 2 finished with values: [0.6571578241170469, 0.03910618284881361] and parameters: {'max_depth': 5, 'eta': 0.18183782079601624, 'subsample': 0.8654335617122717, 'colsample_bytree': 0.6385716113581058}.
[I 2025-05-16 19:46:18,878] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 6, 'eta': 0.13067255833565697, 'subsample': 0.7982667724115198, 'colsample_bytree': 0.7030720342272428}.
[I 2025-05-16 19:46:19,066] Trial 4 finished with values: [0.5657072048475492, 0.022034764659799755] and parameters: {'max_depth': 7, 'eta': 0.023383919324996408, 'subsample': 0.9106554755669621, 'colsample_bytree': 0.6161397854335967}.
[I 2025-05-16 19:46:19,228] Trial 5 finished with values: 

8
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5209497565036451, acc_dist: 0.4741206085817544, dpd_dist: 0.0468291479218906


[I 2025-05-16 19:46:34,113] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.08329734056473123, 'subsample': 0.9475647789312605, 'colsample_bytree': 0.7529293629089207}.
[I 2025-05-16 19:46:34,407] Trial 2 finished with values: [0.5000091902473095, 0.0] and parameters: {'max_depth': 6, 'eta': 0.01341722865651471, 'subsample': 0.6061950111223026, 'colsample_bytree': 0.7926316116601153}.
[I 2025-05-16 19:46:34,655] Trial 3 finished with values: [0.5649627948154752, 0.01727963838093281] and parameters: {'max_depth': 6, 'eta': 0.15172335915346274, 'subsample': 0.6161595940975954, 'colsample_bytree': 0.7094521373851717}.
[I 2025-05-16 19:46:34,850] Trial 4 finished with values: [0.6728608933533068, 0.02891669681260343] and parameters: {'max_depth': 6, 'eta': 0.15459313536496794, 'subsample': 0.7719175597535037, 'colsample_bytree': 0.6970112870836741}.
[I 2025-05-16 19:46:35,109] Trial 5 finished with values: [0.643563916638330

9
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.5209497565036451, acc_dist: 0.4741206085817544, dpd_dist: 0.0468291479218906


[I 2025-05-16 19:46:53,212] Trial 0 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.06375138075506995, 'subsample': 0.893926833623089, 'colsample_bytree': 0.896174704371426}.
[I 2025-05-16 19:46:53,378] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.18504083003002775, 'subsample': 0.8517916809559405, 'colsample_bytree': 0.9852380664842086}.
[I 2025-05-16 19:46:53,542] Trial 2 finished with values: [0.6471419862575168, 0.03952720838003293] and parameters: {'max_depth': 4, 'eta': 0.2976977649203955, 'subsample': 0.6642896137973189, 'colsample_bytree': 0.8384814537962242}.
[I 2025-05-16 19:46:53,703] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.06979110075178287, 'subsample': 0.8852980475795247, 'colsample_bytree': 0.6686020450085478}.
[I 2025-05-16 19:46:53,837] Trial 4 finished with values: [0.64

10
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.15583419503507784, acc_dist: 0.1401078397915561, dpd_dist: 0.015726355243521725
max dist: 0.21399564879954192, acc_dist: 0.05880677935479545, dpd_dist: 0.15518886944474647


[I 2025-05-16 19:47:09,880] Trial 0 finished with values: [0.6621496601140203, 0.07654668065236274] and parameters: {'max_depth': 6, 'eta': 0.059059251139890925, 'subsample': 0.7018801819836384, 'colsample_bytree': 0.608351922970119}.
[I 2025-05-16 19:47:10,136] Trial 1 finished with values: [0.6721532443104711, 0.029267967994317468] and parameters: {'max_depth': 4, 'eta': 0.08316537379390174, 'subsample': 0.8778107416694638, 'colsample_bytree': 0.8370003783192255}.
[I 2025-05-16 19:47:10,357] Trial 2 finished with values: [0.680006310636486, 0.05145875949287407] and parameters: {'max_depth': 5, 'eta': 0.22448179150976819, 'subsample': 0.7983990282933379, 'colsample_bytree': 0.7044148788949252}.
[I 2025-05-16 19:47:10,589] Trial 3 finished with values: [0.692854276375244, 0.03812251930733992] and parameters: {'max_depth': 6, 'eta': 0.27422357218678256, 'subsample': 0.7019482938163367, 'colsample_bytree': 0.9049348842930802}.
[I 2025-05-16 19:47:10,804] Trial 4 finished with values: [0.

11
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.15583419503507784, acc_dist: 0.1401078397915561, dpd_dist: 0.015726355243521725
max dist: 0.15997883853394967, acc_dist: 0.00108690434516979, dpd_dist: 0.1588919341887799


[I 2025-05-16 19:47:28,777] Trial 0 finished with values: [0.676431304433069, 0.04798499646374923] and parameters: {'max_depth': 4, 'eta': 0.020583954038507633, 'subsample': 0.8034721763238504, 'colsample_bytree': 0.8270527007480377}.
[I 2025-05-16 19:47:28,939] Trial 1 finished with values: [0.6864364203374046, 0.03330148282669363] and parameters: {'max_depth': 5, 'eta': 0.09686082880868643, 'subsample': 0.7327815320283826, 'colsample_bytree': 0.6786601320541945}.
[I 2025-05-16 19:47:29,129] Trial 2 finished with values: [0.6899961094619723, 0.040320492494511063] and parameters: {'max_depth': 5, 'eta': 0.04526984610328546, 'subsample': 0.9327411812083509, 'colsample_bytree': 0.8945690358810272}.
[I 2025-05-16 19:47:29,273] Trial 3 finished with values: [0.665007827027292, 0.025476572686512333] and parameters: {'max_depth': 3, 'eta': 0.08657914137235778, 'subsample': 0.759812805830487, 'colsample_bytree': 0.6828100741662607}.
[I 2025-05-16 19:47:29,425] Trial 4 finished with values: [0

12
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.15583419503507784, acc_dist: 0.1401078397915561, dpd_dist: 0.015726355243521725


[I 2025-05-16 19:47:44,717] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.02594327445125303, 'subsample': 0.6751540092134453, 'colsample_bytree': 0.9308644140720301}.
[I 2025-05-16 19:47:44,887] Trial 2 finished with values: [0.6571578241170469, 0.03910618284881361] and parameters: {'max_depth': 7, 'eta': 0.02180815611120479, 'subsample': 0.7809980183723055, 'colsample_bytree': 0.7286373336295384}.
[I 2025-05-16 19:47:44,994] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 6, 'eta': 0.14442732214892445, 'subsample': 0.776097113701689, 'colsample_bytree': 0.7999999716239393}.
[I 2025-05-16 19:47:45,138] Trial 4 finished with values: [0.642140960013234, 0.037769805860584434] and parameters: {'max_depth': 5, 'eta': 0.08229391662663246, 'subsample': 0.7810125111981521, 'colsample_bytree': 0.6693559069660306}.
[I 2025-05-16 19:47:45,247] Trial 5 finished with values: [0.

13
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.15583419503507784, acc_dist: 0.1401078397915561, dpd_dist: 0.015726355243521725


[I 2025-05-16 19:47:57,818] Trial 1 finished with values: [0.654988925751992, 0.02407424123842346] and parameters: {'max_depth': 3, 'eta': 0.21777481429121406, 'subsample': 0.6436881661275712, 'colsample_bytree': 0.6452105336500785}.
[I 2025-05-16 19:47:57,941] Trial 2 finished with values: [0.5956551574136193, 0.04694272855528387] and parameters: {'max_depth': 6, 'eta': 0.12635770939974889, 'subsample': 0.631656863168499, 'colsample_bytree': 0.7702297279427179}.
[I 2025-05-16 19:47:58,132] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.01640643886106074, 'subsample': 0.7703636102082423, 'colsample_bytree': 0.8024460995905257}.
[I 2025-05-16 19:47:58,271] Trial 4 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.1385964066212022, 'subsample': 0.9925055001938462, 'colsample_bytree': 0.7531892730696484}.
[I 2025-05-16 19:47:58,396] Trial 5 finished with values: [0.66

14
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.15583419503507784, acc_dist: 0.1401078397915561, dpd_dist: 0.015726355243521725


[I 2025-05-16 19:48:10,493] Trial 1 finished with values: [0.564262804312064, 0.022285755361437027] and parameters: {'max_depth': 6, 'eta': 0.10184044628368001, 'subsample': 0.6180486445703817, 'colsample_bytree': 0.924447801896132}.
[I 2025-05-16 19:48:10,648] Trial 2 finished with values: [0.6571578241170469, 0.03910618284881361] and parameters: {'max_depth': 6, 'eta': 0.11177759416915214, 'subsample': 0.7432102674908575, 'colsample_bytree': 0.6169728988922}.
[I 2025-05-16 19:48:10,892] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.018851244493403745, 'subsample': 0.9533535764777593, 'colsample_bytree': 0.8309923054261034}.
[I 2025-05-16 19:48:11,031] Trial 4 finished with values: [0.6685782381070542, 0.02245284529160844] and parameters: {'max_depth': 7, 'eta': 0.2619055415638769, 'subsample': 0.8496814008038392, 'colsample_bytree': 0.6916378592602571}.
[I 2025-05-16 19:48:11,164] Trial 5 finished with values: [0.661

15
max_acc: 0.7385818835718202, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.03909385952148436, acc_dist: 0.0389915171275859, dpd_dist: 0.00010234239389846012
max dist: 0.0445663014220259, acc_dist: 0.03127490380070126, dpd_dist: 0.01329139762132464
max dist: 0.10703600601844833, acc_dist: 0.0987557943205566, dpd_dist: 0.008280211697891723
max dist: 0.11005232616190307, acc_dist: 0.0070349784683743965, dpd_dist: 0.10301734769352867


[I 2025-05-16 19:48:23,632] Trial 1 finished with values: [0.6978568343274117, 0.0575284970340881] and parameters: {'max_depth': 6, 'eta': 0.03894418946680987, 'subsample': 0.990885369171125, 'colsample_bytree': 0.9947119400493788}.
[I 2025-05-16 19:48:23,796] Trial 2 finished with values: [0.7307150318748411, 0.06019178741384353] and parameters: {'max_depth': 6, 'eta': 0.2792527038684015, 'subsample': 0.7491697533838187, 'colsample_bytree': 0.8918533419985328}.
[I 2025-05-16 19:48:23,965] Trial 3 finished with values: [0.7050053150263608, 0.056067464841921766] and parameters: {'max_depth': 6, 'eta': 0.06715268852735583, 'subsample': 0.9017712443307825, 'colsample_bytree': 0.9117483568413767}.
[I 2025-05-16 19:48:24,101] Trial 4 finished with values: [0.6564348579953619, 0.042018126746591854] and parameters: {'max_depth': 3, 'eta': 0.0905621734264842, 'subsample': 0.6263513811729876, 'colsample_bytree': 0.8493526142516448}.
[I 2025-05-16 19:48:24,260] Trial 5 finished with values: [0.7

16
max_acc: 0.7464349498978352, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.03664830390295402, acc_dist: 0.036545961509055556, dpd_dist: 0.00010234239389846012
max dist: 0.04260473330815814, acc_dist: 0.029313335686833496, dpd_dist: 0.01329139762132464
max dist: 0.10084202318693146, acc_dist: 0.09256181148903973, dpd_dist: 0.008280211697891723


[I 2025-05-16 19:48:39,019] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.056095002864571114, 'subsample': 0.7453994879313074, 'colsample_bytree': 0.6292336562896789}.
[I 2025-05-16 19:48:39,149] Trial 2 finished with values: [0.6614404793632996, 0.039160973616569235] and parameters: {'max_depth': 7, 'eta': 0.09757577015086409, 'subsample': 0.7989811104402581, 'colsample_bytree': 0.8176410946981263}.
[I 2025-05-16 19:48:39,252] Trial 3 finished with values: [0.6721471174789313, 0.02683713996746338] and parameters: {'max_depth': 6, 'eta': 0.29222647683775005, 'subsample': 0.6374633865523541, 'colsample_bytree': 0.7125915854029851}.
[I 2025-05-16 19:48:39,465] Trial 4 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 6, 'eta': 0.010324114424597703, 'subsample': 0.6357877953294722, 'colsample_bytree': 0.9352767470531878}.
[I 2025-05-16 19:48:39,567] Trial 5 finished with values:

17
max_acc: 0.7464349498978352, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.03664830390295402, acc_dist: 0.036545961509055556, dpd_dist: 0.00010234239389846012
max dist: 0.04260473330815814, acc_dist: 0.029313335686833496, dpd_dist: 0.01329139762132464
max dist: 0.08723208203702212, acc_dist: 0.07734803777469357, dpd_dist: 0.00988404426232855


[I 2025-05-16 19:48:53,162] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.012061242701516551, 'subsample': 0.9625993175655435, 'colsample_bytree': 0.714180329128127}.
[I 2025-05-16 19:48:53,271] Trial 2 finished with values: [0.667858335401139, 0.022817446601897124] and parameters: {'max_depth': 7, 'eta': 0.2263632917414913, 'subsample': 0.6751460122911948, 'colsample_bytree': 0.9444940094624767}.
[I 2025-05-16 19:48:53,409] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.038426763571925385, 'subsample': 0.9837519583168315, 'colsample_bytree': 0.8557007552408132}.
[I 2025-05-16 19:48:53,523] Trial 4 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.13810269689244062, 'subsample': 0.6610332528055362, 'colsample_bytree': 0.9685990831235356}.
[I 2025-05-16 19:48:53,647] Trial 5 finished with values: [0

18
max_acc: 0.7464349498978352, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.03664830390295402, acc_dist: 0.036545961509055556, dpd_dist: 0.00010234239389846012
max dist: 0.04260473330815814, acc_dist: 0.029313335686833496, dpd_dist: 0.01329139762132464
max dist: 0.14704660378333922, acc_dist: 0.1313202485398175, dpd_dist: 0.015726355243521725
max dist: 0.46078129378605304, acc_dist: 0.4102357791762317, dpd_dist: 0.05054551460982133


[I 2025-05-16 19:49:05,868] Trial 2 finished with values: [0.6614404793632996, 0.039160973616569235] and parameters: {'max_depth': 6, 'eta': 0.14199752183660497, 'subsample': 0.9000663106913946, 'colsample_bytree': 0.7092489312520924}.
[I 2025-05-16 19:49:05,997] Trial 3 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 7, 'eta': 0.06128206150808623, 'subsample': 0.6406550340927423, 'colsample_bytree': 0.9490796175833713}.
[I 2025-05-16 19:49:06,127] Trial 4 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.026179061048435537, 'subsample': 0.8012226622319802, 'colsample_bytree': 0.9677228439036216}.
[I 2025-05-16 19:49:06,224] Trial 5 finished with values: [0.6664369104839278, 0.035051666034896754] and parameters: {'max_depth': 4, 'eta': 0.2551090314590642, 'subsample': 0.9509908690648581, 'colsample_bytree': 0.631974539296369}.
[I 2025-05-16 19:49:06,338] Trial 6 finished with values: [

19
max_acc: 0.7464349498978352, max_dpd: 0.09117831450646376, min_acc: 0.5000091902473095, min_dpd: 0.0
max dist: 0.03664830390295402, acc_dist: 0.036545961509055556, dpd_dist: 0.00010234239389846012
max dist: 0.04260473330815814, acc_dist: 0.029313335686833496, dpd_dist: 0.01329139762132464
max dist: 0.14704660378333922, acc_dist: 0.1313202485398175, dpd_dist: 0.015726355243521725
max dist: 0.24835743541838198, acc_dist: 0.21239457856114027, dpd_dist: 0.0359628568572417


[I 2025-05-16 19:49:17,392] Trial 1 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 4, 'eta': 0.018269644039208013, 'subsample': 0.8937133288034471, 'colsample_bytree': 0.7157141126359479}.
[I 2025-05-16 19:49:17,515] Trial 2 finished with values: [0.6707195657301805, 0.02661388780052924] and parameters: {'max_depth': 5, 'eta': 0.07204622384390555, 'subsample': 0.6622078111239567, 'colsample_bytree': 0.9528609240209303}.
[I 2025-05-16 19:49:17,613] Trial 3 finished with values: [0.6371414654768359, 0.04012584482896309] and parameters: {'max_depth': 4, 'eta': 0.2879012349783151, 'subsample': 0.6092674096878996, 'colsample_bytree': 0.9535490649553766}.
[I 2025-05-16 19:49:17,735] Trial 4 finished with values: [0.6335756495207286, 0.025908203961690246] and parameters: {'max_depth': 5, 'eta': 0.06625694777255245, 'subsample': 0.71175122410819, 'colsample_bytree': 0.6728644295629003}.
[I 2025-05-16 19:49:17,834] Trial 5 finished with values: [0.

Pareto-optimal solutions:
Params: {'alpha': 0, 'max_depth': 4, 'eta': 0.23427942936231477, 'subsample': 0.6945965931961444, 'colsample_bytree': 0.6527247741457783}, Accuracy: 0.5000, DPD: 0.0000
Params: {'alpha': 0.015625, 'max_depth': 7, 'eta': 0.09846115378619846, 'subsample': 0.8176032213384978, 'colsample_bytree': 0.8378989637123635}, Accuracy: 0.5000, DPD: 0.0000
Params: {'alpha': 0.01953125, 'max_depth': 5, 'eta': 0.04200097992618926, 'subsample': 0.6616152340584983, 'colsample_bytree': 0.9233753224185217}, Accuracy: 0.5000, DPD: 0.0000
Params: {'alpha': 0.021484375, 'max_depth': 6, 'eta': 0.01341722865651471, 'subsample': 0.6061950111223026, 'colsample_bytree': 0.7926316116601153}, Accuracy: 0.5000, DPD: 0.0000
Params: {'alpha': 0.02197265625, 'max_depth': 3, 'eta': 0.010026783869354019, 'subsample': 0.6146170236451909, 'colsample_bytree': 0.9594608842970873}, Accuracy: 0.5000, DPD: 0.0000
Params: {'alpha': 0.022216796875, 'max_depth': 6, 'eta': 0.061678436905756286, 'subsample'

In [12]:
fig = px.scatter(
    plot_df,
    x='DPD',
    y='Accuracy',
    color='alpha',
    color_continuous_scale='Viridis',
    size_max=15,
    hover_data={'alpha': True, 'DPD': True, 'Accuracy': True},
    title='Pareto Front: DPD vs. Accuracy'
)
fig.update_traces(marker=dict(line_width=0))
fig.show()