In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import roc_auc_score
import xgboost as xgb
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import mlflow
import mlflow.sklearn

# 데이터 로딩
data = pd.read_csv('../data/dataset.csv')

# Tracking Server 설정
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000") # 환경 변수를 설정했지만, 확실하게 설정

In [2]:
# 예측 타겟과 특성 분리
X = data.drop('Default', axis=1)
y = data['Default']

In [3]:
# 범주형과 수치형 컬럼 구분
categorical_cols = ['State', 'BankState', 'NewExist', 'UrbanRural', 'RealEstate']
numerical_cols = ['DisbursementGross', 'GrAppv', 'daysterm']


In [4]:
# 데이터 전처리 파이프라인 구성
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

In [5]:
# 데이터 전처리 실행
X_processed = preprocessor.fit_transform(X)

In [6]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# 하이퍼파라미터 탐색 공간 설정
space = {
    'max_depth': hp.choice('max_depth', range(3, 10)),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'n_estimators': hp.choice('n_estimators', range(50, 200)),
    'gamma': hp.uniform('gamma', 0, 5)
}


In [9]:
# 최적화를 위한 목적 함수 정의
def objective(params):
    # MLflow에 실험 이름 설정
    mlflow.set_experiment("assignment1")

    # 각 하이퍼파라미터 조합별 실험 시작
    with mlflow.start_run(nested=True):
        # XGBoost 모델 초기화 및 훈련
        model = xgb.XGBClassifier(eval_metric='logloss', use_label_encoder=False, **params)
        model.fit(X_train, y_train)

        # 예측 확률 및 ROC-AUC 계산
        probs = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, probs)

        # MLflow에 파라미터 및 메트릭 로깅
        mlflow.log_params(params)
        mlflow.log_metric("roc_auc", auc)

        # 목적 함수 결과 반환 (손실 최소화)
        return {'loss': -auc, 'status': STATUS_OK}


In [10]:
# Hyperopt를 사용한 최적화 실행
trials = Trials()
best_params = fmin(
    fn=objective,
    space=space,
    algo=tpe.suggest,
    max_evals=30,
    trials=trials
)

  0%|          | 0/30 [00:00<?, ?trial/s, best loss=?]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run upset-bear-623 at: http://127.0.0.1:5000/#/experiments/1/runs/d05b938601014c44bb19d75cfc79b031

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1

🏃 View run resilient-goose-236 at: http://127.0.0.1:5000/#/experiments/1/runs/850498acafd74b51927f4d6a5d7a6fdd

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

  7%|▋         | 2/30 [00:00<00:07,  3.77trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run amusing-cod-648 at: http://127.0.0.1:5000/#/experiments/1/runs/d5e83e546a0d48d8a4f0da61cfdd46e9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 10%|█         | 3/30 [00:00<00:06,  4.21trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run rebellious-yak-214 at: http://127.0.0.1:5000/#/experiments/1/runs/a075267b5f484015bc6db9f14fe59334

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 13%|█▎        | 4/30 [00:00<00:05,  4.40trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run righteous-conch-192 at: http://127.0.0.1:5000/#/experiments/1/runs/4620d4eb9dcb4838a35c13935349a6e2

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 17%|█▋        | 5/30 [00:01<00:05,  4.47trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run smiling-seal-957 at: http://127.0.0.1:5000/#/experiments/1/runs/e45f9d44b16840d9a66434c0e52aa34d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

🏃 View run colorful-roo-651 at: http://127.0.0.1:5000/#/experiments/1/runs/79515eacd6c14373a95a077d6782da22

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 23%|██▎       | 7/30 [00:01<00:04,  4.70trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run rare-smelt-29 at: http://127.0.0.1:5000/#/experiments/1/runs/49122e9d55ab4713a4454cd60af506ee

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 27%|██▋       | 8/30 [00:01<00:05,  4.31trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run victorious-crab-104 at: http://127.0.0.1:5000/#/experiments/1/runs/e8a8c76c94494dcea2ff5cee9665212a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 30%|███       | 9/30 [00:02<00:04,  4.42trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run bold-mole-928 at: http://127.0.0.1:5000/#/experiments/1/runs/baabd236da184d2aa5ec54ae19717fa4

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                     

 30%|███       | 9/30 [00:02<00:04,  4.42trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run selective-gnu-741 at: http://127.0.0.1:5000/#/experiments/1/runs/320b3d09f23d4008ad253d3d214827fc

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 37%|███▋      | 11/30 [00:02<00:04,  4.25trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run upbeat-gnu-756 at: http://127.0.0.1:5000/#/experiments/1/runs/3139ba5d88794995b20eb0f6e463624d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 40%|████      | 12/30 [00:02<00:04,  4.45trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run calm-squid-618 at: http://127.0.0.1:5000/#/experiments/1/runs/d8f31b9ff25a435d8310a7cc77fc6bb0

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 43%|████▎     | 13/30 [00:03<00:03,  4.36trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run skittish-flea-530 at: http://127.0.0.1:5000/#/experiments/1/runs/720464f7441947d09707d35ee0d07a44

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 47%|████▋     | 14/30 [00:03<00:03,  4.46trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run burly-snake-473 at: http://127.0.0.1:5000/#/experiments/1/runs/a87899a90ba84f65a45e8f2227eeec38

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run funny-moth-866 at: http://127.0.0.1:5000/#/experiments/1/runs/8ad2222a71334dc7a97bb11482e661b5

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 53%|█████▎    | 16/30 [00:03<00:03,  4.44trial/s, best loss: -0.9784674784674785]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run upbeat-newt-495 at: http://127.0.0.1:5000/#/experiments/1/runs/bbb4f63cadce4e9694fdaa89bfcb81cb

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 57%|█████▋    | 17/30 [00:03<00:02,  4.43trial/s, best loss: -0.9788634788634788]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run capricious-smelt-828 at: http://127.0.0.1:5000/#/experiments/1/runs/e68ea9ca38834867b11e720c51b75e38

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 60%|██████    | 18/30 [00:04<00:02,  4.40trial/s, best loss: -0.9788634788634788]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run ambitious-zebra-481 at: http://127.0.0.1:5000/#/experiments/1/runs/8e260161a352415aaf62588caa55aad2

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run gregarious-wolf-391 at: http://127.0.0.1:5000/#/experiments/1/runs/4e0a17a233534e23b676f32885c720e0

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 67%|██████▋   | 20/30 [00:04<00:02,  4.77trial/s, best loss: -0.9788634788634788]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run mysterious-rook-454 at: http://127.0.0.1:5000/#/experiments/1/runs/8ad09e963be04b0ab2f93d06fa69f2c0

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run crawling-grub-905 at: http://127.0.0.1:5000/#/experiments/1/runs/01c3e9b8b67f417eb050d2871e97c709

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 73%|███████▎  | 22/30 [00:05<00:02,  3.88trial/s, best loss: -0.9788634788634788]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run gentle-ape-900 at: http://127.0.0.1:5000/#/experiments/1/runs/1757d240f95f4c7883093460593ec08a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 77%|███████▋  | 23/30 [00:05<00:01,  4.11trial/s, best loss: -0.9788634788634788]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run righteous-lamb-987 at: http://127.0.0.1:5000/#/experiments/1/runs/270b55f0e8cf4882b1e706ba71e3072d

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run treasured-quail-890 at: http://127.0.0.1:5000/#/experiments/1/runs/13bf1f27c2be466fbc72320280fabaf2

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 83%|████████▎ | 25/30 [00:05<00:01,  4.24trial/s, best loss: -0.9793708543708544]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run fun-mare-200 at: http://127.0.0.1:5000/#/experiments/1/runs/02d963af0ead44cf8f1fb388b9445cf0

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

🏃 View run peaceful-squirrel-88 at: http://127.0.0.1:5000/#/experiments/1/runs/b11d76f24d6d4e47afd67664880fc5fd

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 90%|█████████ | 27/30 [00:06<00:00,  4.14trial/s, best loss: -0.9793708543708544]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run gentle-sloth-143 at: http://127.0.0.1:5000/#/experiments/1/runs/97727b3c65834659927c8034c70cdd2a

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 93%|█████████▎| 28/30 [00:06<00:00,  4.13trial/s, best loss: -0.9803361053361053]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run fearless-snail-270 at: http://127.0.0.1:5000/#/experiments/1/runs/b502811404ae4d348278e6f206bf5a3e

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

 97%|█████████▋| 29/30 [00:06<00:00,  4.30trial/s, best loss: -0.9803361053361053]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



🏃 View run nervous-toad-355 at: http://127.0.0.1:5000/#/experiments/1/runs/6190e810409c4c6aa8dd64ed9d0cedd9

🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1                      

100%|██████████| 30/30 [00:06<00:00,  4.32trial/s, best loss: -0.9803361053361053]


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)

