In [None]:
import mlflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

In [58]:
dataset_df = pd.read_csv("./personality_preprocessing/personality_preprocessing.csv")

x_train, x_test, y_train, y_test = train_test_split(
    dataset_df.drop(columns=["Personality"]),
    dataset_df["Personality"],
    test_size=0.2,
    random_state=42,
)

In [59]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Personality Prediction")

<Experiment: artifact_location='mlflow-artifacts:/398136347545907697', creation_time=1750476836211, experiment_id='398136347545907697', last_update_time=1750476836211, lifecycle_stage='active', name='Personality Prediction', tags={}>

In [60]:
models = {
    "DecisionTreeClassifier": DecisionTreeClassifier(),
    "RandomForestClassifier": RandomForestClassifier(),
    "XGBClassifier": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

In [61]:
for name, model in models.items():
    with mlflow.start_run():
        mlflow.log_param("model_type", name)

        start_time = time.time()
        model.fit(x_train, y_train)
        train_time = time.time() - start_time

        y_pred = model.predict(x_test)

        # Log metrics
        mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
        mlflow.log_metric("precision", precision_score(y_test, y_pred, average='binary'))
        mlflow.log_metric("recall", recall_score(y_test, y_pred, average='binary'))
        mlflow.log_metric("f1_score", f1_score(y_test, y_pred, average='binary'))
        mlflow.log_metric("train_time", train_time)

        # Extra metrics
        mlflow.log_metric("true_positive_rate", recall_score(y_test, y_pred, pos_label=1))
        mlflow.log_metric("true_negative_rate", recall_score(y_test, y_pred, pos_label=0))

        # Log model
        if name == "XGBClassifier":
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")



üèÉ View run rumbling-steed-520 at: http://localhost:5000/#/experiments/398136347545907697/runs/fb33f6d0a9ce41fb9c0289e00e6835e4
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697




üèÉ View run clean-owl-148 at: http://localhost:5000/#/experiments/398136347545907697/runs/009d59af0c7548eb95a757365a4720c1
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  self.get_booster().save_model(fname)


üèÉ View run efficient-bat-22 at: http://localhost:5000/#/experiments/398136347545907697/runs/483f1572f0704c07aa63ba0b772183dc
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


In [64]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.xgboost
import time

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

# Run utama (global)
with mlflow.start_run(run_name="XGB_GridSearch_Full") as main_run:
    grid_search = GridSearchCV(xgb, param_grid, scoring='accuracy', cv=3, return_train_score=True)
    
    start_time = time.time()
    grid_search.fit(x_train, y_train)
    train_time = time.time() - start_time

    # Loop semua kombinasi parameter (log semua)
    for i in range(len(grid_search.cv_results_['params'])):
        params = grid_search.cv_results_['params'][i]

        # Buat ulang model dengan param ini
        model = XGBClassifier(
            use_label_encoder=False,
            eval_metric='logloss',
            **params
        )
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

        with mlflow.start_run(run_name=f"Trial_{i+1}", nested=True):
            for param_name, param_value in params.items():
                mlflow.log_param(param_name, param_value)

            mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
            mlflow.log_metric("precision", precision_score(y_test, y_pred, average='binary'))
            mlflow.log_metric("recall", recall_score(y_test, y_pred, average='binary'))
            mlflow.log_metric("f1_score", f1_score(y_test, y_pred, average='binary'))

            # Extra
            mlflow.log_metric("train_time", train_time)
            mlflow.log_metric("true_positive_rate", recall_score(y_test, y_pred, pos_label=1))
            mlflow.log_metric("true_negative_rate", recall_score(y_test, y_pred, pos_label=0))

            # Simpan model ini
            mlflow.xgboost.log_model(model, "model")

    # Log model terbaik sebagai run utama
    y_pred_best = grid_search.best_estimator_.predict(x_test)

    mlflow.log_param("model_type", "XGBClassifier_Best")
    mlflow.log_params(grid_search.best_params_)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred_best))
    mlflow.log_metric("precision", precision_score(y_test, y_pred_best, average='binary'))
    mlflow.log_metric("recall", recall_score(y_test, y_pred_best, average='binary'))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred_best, average='binary'))
    mlflow.log_metric("train_time", train_time)

    mlflow.xgboost.log_model(grid_search.best_estimator_, "model_best")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


üèÉ View run Trial_1 at: http://localhost:5000/#/experiments/398136347545907697/runs/238d1b04e9e94d5da8eda97c4ea10386
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_2 at: http://localhost:5000/#/experiments/398136347545907697/runs/1c8b854942aa4b4da5bdb34452a67105
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_3 at: http://localhost:5000/#/experiments/398136347545907697/runs/21625e34c1dd4aa4b3d856cec7b0119b
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_4 at: http://localhost:5000/#/experiments/398136347545907697/runs/5133f50df5e84cecb761ba41e4736f22
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_5 at: http://localhost:5000/#/experiments/398136347545907697/runs/831c3598089f4326a3a32f93a8e26c04
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_6 at: http://localhost:5000/#/experiments/398136347545907697/runs/32b525c136284f1aa3c437ee9c01551c
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_7 at: http://localhost:5000/#/experiments/398136347545907697/runs/5af2018191d8431c80cd944303b1336e
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_8 at: http://localhost:5000/#/experiments/398136347545907697/runs/9ecd1c03a76243928a9779be36f2103e
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_9 at: http://localhost:5000/#/experiments/398136347545907697/runs/5af59cc4a32743d3a951e252d44e3772
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_10 at: http://localhost:5000/#/experiments/398136347545907697/runs/fad9ea50539d4b299228b8a57c3ef4a2
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_11 at: http://localhost:5000/#/experiments/398136347545907697/runs/ced413120a9b46678c333e353b58c3f3
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_12 at: http://localhost:5000/#/experiments/398136347545907697/runs/cad425b3c8e84dedab63fbe5b1bce6c0
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_13 at: http://localhost:5000/#/experiments/398136347545907697/runs/6ac7dc03dd2d4a72b819c751c6d618e6
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_14 at: http://localhost:5000/#/experiments/398136347545907697/runs/f14788ac06214910b96b9bdf1b63f9e6
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_15 at: http://localhost:5000/#/experiments/398136347545907697/runs/b9b8f7ea1b524136bd42930889ea93b5
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_16 at: http://localhost:5000/#/experiments/398136347545907697/runs/f6a345af099f4bc480834971e6ab858f
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_17 at: http://localhost:5000/#/experiments/398136347545907697/runs/064e1802159a4510870760b3785d289d
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_18 at: http://localhost:5000/#/experiments/398136347545907697/runs/e88a1644cebf4254ba31dd38db3e5829
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_19 at: http://localhost:5000/#/experiments/398136347545907697/runs/56f9598301814c939468d4f357386809
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_20 at: http://localhost:5000/#/experiments/398136347545907697/runs/53ce69b5319f4bfa82ee7db7952fd8fe
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_21 at: http://localhost:5000/#/experiments/398136347545907697/runs/758735340c3e41fbb9dfa6aed01c5d80
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_22 at: http://localhost:5000/#/experiments/398136347545907697/runs/8cdeba6bb6e94780802ba2039cc7cfa3
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_23 at: http://localhost:5000/#/experiments/398136347545907697/runs/3a0c737dbfa2475582911b3456dc26db
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_24 at: http://localhost:5000/#/experiments/398136347545907697/runs/410435c913924a35a43e215d9d0d3bec
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_25 at: http://localhost:5000/#/experiments/398136347545907697/runs/165730b3f96c4271ac166b4180495816
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


üèÉ View run Trial_26 at: http://localhost:5000/#/experiments/398136347545907697/runs/88171d63c1cd4244aeb16fa86b2aa70e
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


  self.get_booster().save_model(fname)
  self.get_booster().save_model(fname)


üèÉ View run Trial_27 at: http://localhost:5000/#/experiments/398136347545907697/runs/0d6b97fc0d474ae3a7369e68ab093d90
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697




üèÉ View run XGB_GridSearch_Full at: http://localhost:5000/#/experiments/398136347545907697/runs/3f0991262aef4f0e904320b3cb8d8035
üß™ View experiment at: http://localhost:5000/#/experiments/398136347545907697


In [1]:
import mlflow
import mlflow.xgboost
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import time

# === Load data ===
dataset_df = pd.read_csv("./personality_preprocessing/personality_preprocessing.csv")

x_train, x_test, y_train, y_test = train_test_split(
    dataset_df.drop(columns=["Personality"]),
    dataset_df["Personality"],
    test_size=0.2,
    random_state=42
)

# === MLflow setup ===
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Personality_Prediction_Experiment")

# Enable MLflow autologging for XGBoost
mlflow.xgboost.autolog()

# === Training ===
if __name__ == "__main__":
    model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    
    print("Training XGBoost model with MLflow autologging...")
    with mlflow.start_run(run_name="XGBoost"):
        start_time = time.time()
        model.fit(x_train, y_train)
        train_time = time.time() - start_time

        # Manual log: training time (autolog doesn't do this)
        mlflow.log_metric("train_time", train_time)

        print(f"[‚úì] Training completed in {train_time:.2f} seconds.")

2025/06/21 16:27:33 INFO mlflow.tracking.fluent: Experiment with name 'Personality_Prediction_Experiment' does not exist. Creating a new experiment.


Training XGBoost model with MLflow autologging...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[‚úì] Training completed in 9.58 seconds.
üèÉ View run XGBoost at: http://localhost:5000/#/experiments/738617894064911806/runs/84ed0f1694ac44178205deb7f849c549
üß™ View experiment at: http://localhost:5000/#/experiments/738617894064911806
