In [1]:
import pandas as pd
import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from mlflow.models import infer_signature
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# Îç∞Ïù¥ÌÑ∞ Î°úÎî© Î∞è Ï†ÑÏ≤òÎ¶¨
data = pd.read_csv('../data/churn.csv')

# Tracking Server ÏÑ§Ï†ï
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000") # ÌôòÍ≤Ω Î≥ÄÏàòÎ•º ÏÑ§Ï†ïÌñàÏßÄÎßå, ÌôïÏã§ÌïòÍ≤å ÏÑ§Ï†ï

In [2]:
X = data.drop(['Exited', 'RowNumber', 'CustomerId', 'Surname'], axis=1)
y = data['Exited']

In [3]:
categorical_features = ['Geography', 'Gender']
numeric_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']

In [4]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [5]:
X_processed = preprocessor.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [7]:
# ÌïòÏù¥ÌçºÌååÎùºÎØ∏ÌÑ∞ ÌÉêÏÉâ Í≥µÍ∞Ñ Ï†ïÏùò
space = {
    'max_depth': hp.choice('max_depth', range(3, 10)),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators': hp.choice('n_estimators', range(50, 200)),
    'gamma': hp.uniform('gamma', 0, 5)
}

In [8]:
# objective Ìï®Ïàò Ï†ïÏùò
def objective(params):
    with mlflow.start_run(nested=True):
        model = xgb.XGBClassifier(
            use_label_encoder=False,
            eval_metric='logloss',
            **params
        )
        model.fit(X_train, y_train)
        
        # ÏòàÏ∏°Í∞íÍ≥º ÌôïÎ•† Î∂ÑÎ¶¨
        preds = model.predict(X_test)
        probs = model.predict_proba(X_test)[:, 1]

        acc = accuracy_score(y_test, preds)
        precision = precision_score(y_test, preds)
        recall = recall_score(y_test, preds)
        f1 = f1_score(y_test, preds)
        roc_auc = roc_auc_score(y_test, probs)

        mlflow.log_params(params)
        mlflow.log_metrics({
            'accuracy': acc,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'roc_auc': roc_auc
        })

        signature = infer_signature(X_train, probs)

        return {
            'loss': -roc_auc,
            'status': STATUS_OK,
            'model': model,
            'signature': signature
        }

In [9]:
# Ïã§Ìóò ÏÑ§Ï†ï Î∞è fmin ÏàòÌñâ
mlflow.set_experiment("practice5")

with mlflow.start_run(run_name="XGBoost") as run:
    trials = Trials()
    best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=20,
        trials=trials
    )

    # best run Í≤∞Í≥º Ï∂îÏ∂ú
    best_result = sorted(trials.results, key=lambda x: x["loss"])[0]
    best_model = best_result["model"]
    signature = best_result["signature"]

    mlflow.log_params(best)
    mlflow.log_metric("best_roc_auc", -best_result["loss"])

    # ÏµúÏ¢Ö best Î™®Îç∏ Ï†ÄÏû• (Î£®Ìä∏ runÏóê)
    mlflow.xgboost.log_model(best_model, "best_xgboost_model", signature=signature)

    print(f"Best parameters: {best}")
    print(f"Best ROC-AUC: {-best_result['loss']:.4f}")

2025/10/10 23:56:40 INFO mlflow.tracking.fluent: Experiment with name 'practice5' does not exist. Creating a new experiment.


  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run legendary-lamb-99 at: http://127.0.0.1:5000/#/experiments/1/runs/75897464a9494388bf272ab2bd425079

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1

  5%|‚ñå         | 1/20 [00:01<00:26,  1.37s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run bustling-cat-441 at: http://127.0.0.1:5000/#/experiments/1/runs/957cd6d57bf2444e83e4184b4dd262f9

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 10%|‚ñà         | 2/20 [00:02<00:26,  1.46s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run brawny-lamb-35 at: http://127.0.0.1:5000/#/experiments/1/runs/4c0b510df3fe4f179d30ba45ea6f8649

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 15%|‚ñà‚ñå        | 3/20 [00:03<00:21,  1.28s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run unruly-sloth-838 at: http://127.0.0.1:5000/#/experiments/1/runs/5b0d667998ed4dc1804b572dcbfa0a6a

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 20%|‚ñà‚ñà        | 4/20 [00:05<00:19,  1.20s/trial, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run adorable-sponge-438 at: http://127.0.0.1:5000/#/experiments/1/runs/3d420dc2bd564e4491e01799d1949a63

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 25%|‚ñà‚ñà‚ñå       | 5/20 [00:05<00:15,  1.03s/trial, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run calm-fish-59 at: http://127.0.0.1:5000/#/experiments/1/runs/f88784d3a70c4d58af6df4907c06ff87

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 30%|‚ñà‚ñà‚ñà       | 6/20 [00:06<00:13,  1.04trial/s, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run respected-shark-618 at: http://127.0.0.1:5000/#/experiments/1/runs/1dccd70171b149f2b70227c5f0c9168d

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 35%|‚ñà‚ñà‚ñà‚ñå      | 7/20 [00:07<00:11,  1.11trial/s, best loss: -0.8748660044873653]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run flawless-hog-386 at: http://127.0.0.1:5000/#/experiments/1/runs/e62c76881c404b52a13150f2210db9fa

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 40%|‚ñà‚ñà‚ñà‚ñà      | 8/20 [00:08<00:10,  1.13trial/s, best loss: -0.8748660044873653]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run vaunted-stoat-293 at: http://127.0.0.1:5000/#/experiments/1/runs/57d045dea1f14602b8b4fd55fa59516d

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 9/20 [00:08<00:09,  1.18trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run powerful-mule-288 at: http://127.0.0.1:5000/#/experiments/1/runs/378f81a9d5d24e179d3c706763be619c

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 10/20 [00:09<00:08,  1.13trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run luxuriant-fox-112 at: http://127.0.0.1:5000/#/experiments/1/runs/15b2b7e8260e4bb2b8f9dec3d0e2ff9f

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 11/20 [00:10<00:08,  1.10trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run colorful-ray-160 at: http://127.0.0.1:5000/#/experiments/1/runs/300b0e48936b49248aebe2676ebe48ae

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 12/20 [00:11<00:07,  1.05trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run exultant-ray-565 at: http://127.0.0.1:5000/#/experiments/1/runs/37a2385385d84507b8b753df9f6d1f87

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 13/20 [00:12<00:06,  1.04trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run worried-kit-216 at: http://127.0.0.1:5000/#/experiments/1/runs/7823f0657f124c8194d5bdcbdca944a6

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 14/20 [00:14<00:06,  1.02s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run stately-sloth-26 at: http://127.0.0.1:5000/#/experiments/1/runs/aac28baf5aa94db9a204a3d5cfaa64fb

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 15/20 [00:15<00:05,  1.00s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run learned-calf-964 at: http://127.0.0.1:5000/#/experiments/1/runs/971d2ac4ef4e410ea85a173862c7cac7

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 16/20 [00:16<00:04,  1.00s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run angry-gull-757 at: http://127.0.0.1:5000/#/experiments/1/runs/bc83f4680d994c13833527d4b0d28cf1

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 17/20 [00:17<00:02,  1.00trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run wistful-cub-31 at: http://127.0.0.1:5000/#/experiments/1/runs/5c6c46e1ab3644b9a5ec9e81dc7d8998

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 18/20 [00:17<00:01,  1.03trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run adventurous-loon-943 at: http://127.0.0.1:5000/#/experiments/1/runs/ab72710ea1d64f46ad6814686a2cd68e

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 19/20 [00:18<00:00,  1.09trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



üèÉ View run legendary-squid-80 at: http://127.0.0.1:5000/#/experiments/1/runs/5001c0ce02ec4c58b5d9c208c216b8b6

üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1                      

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:19<00:00,  1.02trial/s, best loss: -0.8753085657373672]


  self.get_booster().save_model(fname)


Best parameters: {'gamma': 2.175485630996258, 'learning_rate': 0.13314541619495712, 'max_depth': 2, 'n_estimators': 18}
Best ROC-AUC: 0.8753
üèÉ View run XGBoost at: http://127.0.0.1:5000/#/experiments/1/runs/34850d987173402bbb32cae4d9f89456
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1
