In [1]:
import pandas as pd
import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from mlflow.models import infer_signature
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

# 데이터 로딩 및 전처리
data = pd.read_csv('../data/churn.csv')

# Tracking Server 설정
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000") # 환경 변수를 설정했지만, 확실하게 설정

In [2]:
X = data.drop(['Exited', 'RowNumber', 'CustomerId', 'Surname'], axis=1)
y = data['Exited']

In [3]:
categorical_features = ['Geography', 'Gender']
numeric_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']

In [4]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [5]:
X_processed = preprocessor.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [7]:
# 하이퍼파라미터 탐색 공간 정의
space = {
    'max_depth': hp.choice('max_depth', range(3, 10)),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators': hp.choice('n_estimators', range(50, 200)),
    'gamma': hp.uniform('gamma', 0, 5)
}

In [8]:
# objective 함수 정의
def objective(params):
    with mlflow.start_run(nested=True):
        model = xgb.XGBClassifier(
            use_label_encoder=False,
            eval_metric='logloss',
            **params
        )
        model.fit(X_train, y_train)
        
        # 예측값과 확률 분리
        preds = model.predict(X_test)
        probs = model.predict_proba(X_test)[:, 1]

        acc = accuracy_score(y_test, preds)
        precision = precision_score(y_test, preds)
        recall = recall_score(y_test, preds)
        f1 = f1_score(y_test, preds)
        roc_auc = roc_auc_score(y_test, probs)

        mlflow.log_params(params)
        mlflow.log_metrics({
            'accuracy': acc,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'roc_auc': roc_auc
        })

        signature = infer_signature(X_train, probs)

        return {
            'loss': -roc_auc,
            'status': STATUS_OK,
            'model': model,
            'signature': signature
        }

In [9]:
# 실험 설정 및 fmin 수행
mlflow.set_experiment("practice5")

with mlflow.start_run(run_name="XGBoost") as run:
    trials = Trials()
    best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=20,
        trials=trials
    )

    # best run 결과 추출
    best_result = sorted(trials.results, key=lambda x: x["loss"])[0]
    best_model = best_result["model"]
    signature = best_result["signature"]

    mlflow.log_params(best)
    mlflow.log_metric("best_roc_auc", -best_result["loss"])

    # 최종 best 모델 저장 (루트 run에)
    mlflow.xgboost.log_model(best_model, "best_xgboost_model", signature=signature)

    print(f"Best parameters: {best}")
    print(f"Best ROC-AUC: {-best_result['loss']:.4f}")

2025/10/10 23:56:40 INFO mlflow.tracking.fluent: Experiment with name 'practice5' does not exist. Creating a new experiment.


  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run legendary-lamb-99 at: http://127.0.0.1:5000/#/experiments/1/runs/75897464a9494388bf272ab2bd425079

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1

  5%|▌         | 1/20 [00:01<00:26,  1.37s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run bustling-cat-441 at: http://127.0.0.1:5000/#/experiments/1/runs/957cd6d57bf2444e83e4184b4dd262f9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 10%|█         | 2/20 [00:02<00:26,  1.46s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run brawny-lamb-35 at: http://127.0.0.1:5000/#/experiments/1/runs/4c0b510df3fe4f179d30ba45ea6f8649

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 15%|█▌        | 3/20 [00:03<00:21,  1.28s/trial, best loss: -0.871954125636726]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run unruly-sloth-838 at: http://127.0.0.1:5000/#/experiments/1/runs/5b0d667998ed4dc1804b572dcbfa0a6a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 20%|██        | 4/20 [00:05<00:19,  1.20s/trial, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run adorable-sponge-438 at: http://127.0.0.1:5000/#/experiments/1/runs/3d420dc2bd564e4491e01799d1949a63

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 25%|██▌       | 5/20 [00:05<00:15,  1.03s/trial, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run calm-fish-59 at: http://127.0.0.1:5000/#/experiments/1/runs/f88784d3a70c4d58af6df4907c06ff87

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 30%|███       | 6/20 [00:06<00:13,  1.04trial/s, best loss: -0.873843125891654]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run respected-shark-618 at: http://127.0.0.1:5000/#/experiments/1/runs/1dccd70171b149f2b70227c5f0c9168d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                    

 35%|███▌      | 7/20 [00:07<00:11,  1.11trial/s, best loss: -0.8748660044873653]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run flawless-hog-386 at: http://127.0.0.1:5000/#/experiments/1/runs/e62c76881c404b52a13150f2210db9fa

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 40%|████      | 8/20 [00:08<00:10,  1.13trial/s, best loss: -0.8748660044873653]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run vaunted-stoat-293 at: http://127.0.0.1:5000/#/experiments/1/runs/57d045dea1f14602b8b4fd55fa59516d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 45%|████▌     | 9/20 [00:08<00:09,  1.18trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run powerful-mule-288 at: http://127.0.0.1:5000/#/experiments/1/runs/378f81a9d5d24e179d3c706763be619c

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 50%|█████     | 10/20 [00:09<00:08,  1.13trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run luxuriant-fox-112 at: http://127.0.0.1:5000/#/experiments/1/runs/15b2b7e8260e4bb2b8f9dec3d0e2ff9f

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 55%|█████▌    | 11/20 [00:10<00:08,  1.10trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run colorful-ray-160 at: http://127.0.0.1:5000/#/experiments/1/runs/300b0e48936b49248aebe2676ebe48ae

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 60%|██████    | 12/20 [00:11<00:07,  1.05trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run exultant-ray-565 at: http://127.0.0.1:5000/#/experiments/1/runs/37a2385385d84507b8b753df9f6d1f87

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 65%|██████▌   | 13/20 [00:12<00:06,  1.04trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run worried-kit-216 at: http://127.0.0.1:5000/#/experiments/1/runs/7823f0657f124c8194d5bdcbdca944a6

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 70%|███████   | 14/20 [00:14<00:06,  1.02s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run stately-sloth-26 at: http://127.0.0.1:5000/#/experiments/1/runs/aac28baf5aa94db9a204a3d5cfaa64fb

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 75%|███████▌  | 15/20 [00:15<00:05,  1.00s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run learned-calf-964 at: http://127.0.0.1:5000/#/experiments/1/runs/971d2ac4ef4e410ea85a173862c7cac7

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 80%|████████  | 16/20 [00:16<00:04,  1.00s/trial, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run angry-gull-757 at: http://127.0.0.1:5000/#/experiments/1/runs/bc83f4680d994c13833527d4b0d28cf1

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 85%|████████▌ | 17/20 [00:17<00:02,  1.00trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run wistful-cub-31 at: http://127.0.0.1:5000/#/experiments/1/runs/5c6c46e1ab3644b9a5ec9e81dc7d8998

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 90%|█████████ | 18/20 [00:17<00:01,  1.03trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run adventurous-loon-943 at: http://127.0.0.1:5000/#/experiments/1/runs/ab72710ea1d64f46ad6814686a2cd68e

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 95%|█████████▌| 19/20 [00:18<00:00,  1.09trial/s, best loss: -0.8753085657373672]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run legendary-squid-80 at: http://127.0.0.1:5000/#/experiments/1/runs/5001c0ce02ec4c58b5d9c208c216b8b6

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

100%|██████████| 20/20 [00:19<00:00,  1.02trial/s, best loss: -0.8753085657373672]


  self.get_booster().save_model(fname)


Best parameters: {'gamma': 2.175485630996258, 'learning_rate': 0.13314541619495712, 'max_depth': 2, 'n_estimators': 18}
Best ROC-AUC: 0.8753
🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/1/runs/34850d987173402bbb32cae4d9f89456
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1
