In [19]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb
from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import wandb
from wandb.sklearn import plot_precision_recall, plot_feature_importances
from wandb.sklearn import plot_class_proportions, plot_learning_curve, plot_roc

import warnings 
warnings.filterwarnings('ignore')

In [20]:
train_df = pd.read_csv("../adult_train.csv")
test_df = pd.read_csv("../adult_test.csv")

train_df.replace('?', pd.NA, inplace=True)
test_df.replace('?', pd.NA, inplace=True)

X = train_df.drop("income", axis=1)
y = train_df["income"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
sweep_config = {
    "method": "bayes",
    "metric": {"goal": "maximize", "name": "Validation AUC"},
    "parameters": {
        "max_depth": {"values": [6, 8, 10]},
        "learning_rate": {"min": 0.01, "max": 0.2},
        "n_estimators": {"values": [100, 150, 200]},
        "subsample": {"values": [0.8, 1.0]}
    }
}


In [22]:
sweep_id = wandb.sweep(sweep_config, project="hyperparameter-sweep")

Create sweep with ID: muykvo5n
Sweep URL: https://wandb.ai/annina_ecker-fh-st-p-lten/hyperparameter-sweep/sweeps/muykvo5n


In [23]:
def train():
    wandb.init()
    config = wandb.config

    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    params = {
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "max_depth": config.max_depth,
        "learning_rate": config.learning_rate,
        "subsample": config.subsample
    }

    bst = xgb.train(
        params,
        dtrain,
        num_boost_round=config.n_estimators,
        evals=[(dtrain, "train"), (dval, "val")],
        early_stopping_rounds=10,
        verbose_eval=0,
    )

    y_val_preds = bst.predict(dval)
    auc_val = roc_auc_score(y_val, y_val_preds)

    wandb.log({"Validation AUC": auc_val})

    bst.save_model(f"model_{wandb.run.name}.json")

    wandb.finish()


In [24]:
wandb.agent(sweep_id, function=train, count=10)  # count=10 begrenzt auf 10 Läufe

[34m[1mwandb[0m: Agent Starting Run: 3pw8rrqw with config:
[34m[1mwandb[0m: 	learning_rate: 0.07560684031605631
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 150
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.929


[34m[1mwandb[0m: Agent Starting Run: stpyfl1k with config:
[34m[1mwandb[0m: 	learning_rate: 0.17194710472386168
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 100
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93147


[34m[1mwandb[0m: Agent Starting Run: ytxccn5x with config:
[34m[1mwandb[0m: 	learning_rate: 0.1003989224511436
[34m[1mwandb[0m: 	max_depth: 8
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93084


[34m[1mwandb[0m: Agent Starting Run: okz8dvsx with config:
[34m[1mwandb[0m: 	learning_rate: 0.0503260092337021
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.92926


[34m[1mwandb[0m: Agent Starting Run: 6nyqe8ix with config:
[34m[1mwandb[0m: 	learning_rate: 0.16257305590854543
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 100
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93145


[34m[1mwandb[0m: Agent Starting Run: okz7twbp with config:
[34m[1mwandb[0m: 	learning_rate: 0.15911958274268503
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93132


[34m[1mwandb[0m: Agent Starting Run: b2bqyybs with config:
[34m[1mwandb[0m: 	learning_rate: 0.1912219412431304
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 150
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93181


[34m[1mwandb[0m: Agent Starting Run: lnvf3gpa with config:
[34m[1mwandb[0m: 	learning_rate: 0.1787079133050409
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 150
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93218


[34m[1mwandb[0m: Agent Starting Run: j5pnha9h with config:
[34m[1mwandb[0m: 	learning_rate: 0.1456579394865136
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.9325


[34m[1mwandb[0m: Agent Starting Run: okpxragp with config:
[34m[1mwandb[0m: 	learning_rate: 0.10781405028146396
[34m[1mwandb[0m: 	max_depth: 6
[34m[1mwandb[0m: 	n_estimators: 200
[34m[1mwandb[0m: 	subsample: 1


0,1
Validation AUC,▁

0,1
Validation AUC,0.93179
