In [3]:
import pip
import pickle
import numpy as np
import mlflow
import mlflow.xgboost
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from mlflow.models.signature import infer_signature


# # Initialize MLflow
# mlflow.set_tracking_uri('sqlite:///mlflow.db')
# mlflow.set_experiment('Spine-disease-exp')
# mlflow.sklearn.autolog()


  import pkg_resources


In [4]:
with open('./data/X_train.pkl', 'rb') as f_in:
    X_train = pickle.load(f_in)

with open('./data/X_test.pkl', 'rb') as f_in:
    X_test = pickle.load(f_in)

with open('./data/y_train.pkl', 'rb') as f_in:
    y_train = pickle.load(f_in)

with open('./data/y_test.pkl', 'rb') as f_in:
    y_test = pickle.load(f_in)

In [4]:
X_val = X_test
y_val = y_test

In [17]:
# 3. Hyperopt Objective Function
def objective(params):
    with mlflow.start_run(nested=True):
        # Convert params to int where needed
        params = {
            'n_estimators': int(params['n_estimators']),
            'max_depth': int(params['max_depth']),
            'learning_rate': params['lr'],
            'subsample': params['subsample'],
            'colsample_bytree': params['colsample'],
            'gamma': params['gamma']
        }
        
        # Train model
        model = XGBClassifier(
            **params,
            random_state=42,
            eval_metric='logloss',
            use_label_encoder=False
        )
        
        model.fit(X_train, y_train, 
                 eval_set=[(X_val, y_val)],
                 verbose=0)
        
        # Predict
        y_pred_proba = model.predict_proba(X_val)[:, 1]
        y_pred = (y_pred_proba > 0.5).astype(int)
        
        # Calculate metrics
        auc = roc_auc_score(y_val, y_pred_proba)
        accuracy = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred)
        
        # MLflow Tracking
        mlflow.log_params(params)
        mlflow.log_metrics({
            "val_auc": auc,
            "val_accuracy": accuracy,
            "val_f1": f1
        })
        
        # Model logging with all fixes
        mlflow.xgboost.log_model(
            xgb_model=model,
            name=f"xgb_trial",
            signature=infer_signature(X_train, model.predict(X_train)),
            input_example=X_train[:1],
            registered_model_name="XGBoost_Hyperopt"
        )
        
        return {'loss': -auc, 'status': STATUS_OK}

In [15]:
# 4. Search Space
space = {
    'n_estimators': hp.quniform('n_estimators', 50, 500, 25),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'lr': hp.loguniform('lr', np.log(0.001), np.log(0.3)),
    'subsample': hp.uniform('subsample', 0.6, 1),
    'colsample': hp.uniform('colsample', 0.6, 1),
    'gamma': hp.uniform('gamma', 0, 5)
}

In [16]:
# 5. Main Execution
with mlflow.start_run(run_name="XGBoost_Hyperopt_Search"):
    trials = Trials()
    best = fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=10, #100
        trials=trials
    )
    
    # Train final model
    best_params = {
        'n_estimators': int(best['n_estimators']),
        'max_depth': int(best['max_depth']),
        'learning_rate': best['lr'],
        'subsample': best['subsample'],
        'colsample_bytree': best['colsample'],
        'gamma': best['gamma']
    }
    
    final_model = XGBClassifier(**best_params)
    final_model.fit(X_train, y_train)
    
    # Final evaluation
    test_pred_proba = final_model.predict_proba(X_test)[:, 1]
    test_pred = (test_pred_proba > 0.5).astype(int)
    
    mlflow.log_metrics({
        "test_auc": roc_auc_score(y_test, test_pred_proba),
        "test_accuracy": accuracy_score(y_test, test_pred),
        "test_f1": f1_score(y_test, test_pred)
    })
    
    # Log final model with all fixes
    mlflow.xgboost.log_model(
        xgb_model=final_model,
        artifact_path="final_model",
        signature=infer_signature(X_test, test_pred),
        input_example=X_test[:1],
        registered_model_name="XGBoost_Final"
    )

print("Optimization complete! View results with: mlflow ui --backend-store-uri sqlite:///mlflow.db")

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

2025/07/22 23:47:21 INFO mlflow.store.db.utils: Creating initial MLflow database tables...

2025/07/22 23:47:21 INFO mlflow.store.db.utils: Updating database tables

INFO  [alembic.runtime.migration] Context impl SQLiteImpl.

INFO  [alembic.runtime.migration] Will assume non-transactional DDL.



 10%|█         | 1/10 [00:52<07:48, 52.04s/trial, best loss: -0.9452380952380953]

Successfully registered model 'XGBoost_Hyperopt'.
Created version '1' of model 'XGBoost_Hyperopt'.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '2' of model 'XGBoost_Hyperopt'.


 20%|██        | 2/10 [01:49<07:19, 54.95s/trial, best loss: -0.9452380952380953]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '3' of model 'XGBoost_Hyperopt'.


 30%|███       | 3/10 [02:45<06:28, 55.50s/trial, best loss: -0.9452380952380953]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)



 40%|████      | 4/10 [03:41<05:34, 55.69s/trial, best loss: -0.9452380952380953]

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '4' of model 'XGBoost_Hyperopt'.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '5' of model 'XGBoost_Hyperopt'.


 50%|█████     | 5/10 [04:38<04:41, 56.25s/trial, best loss: -0.9452380952380953]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '6' of model 'XGBoost_Hyperopt'.


 60%|██████    | 6/10 [05:35<03:46, 56.64s/trial, best loss: -0.9452380952380953]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)



 70%|███████   | 7/10 [06:33<02:50, 56.85s/trial, best loss: -0.9452380952380953]

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '7' of model 'XGBoost_Hyperopt'.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '8' of model 'XGBoost_Hyperopt'.


 80%|████████  | 8/10 [07:24<01:50, 55.09s/trial, best loss: -0.9452380952380953]

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)



 90%|█████████ | 9/10 [08:10<00:52, 52.36s/trial, best loss: -0.9452380952380953]

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '9' of model 'XGBoost_Hyperopt'.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


  self.get_booster().save_model(fname)

Registered model 'XGBoost_Hyperopt' already exists. Creating a new version of this model...
Created version '10' of model 'XGBoost_Hyperopt'.


100%|██████████| 10/10 [09:06<00:00, 54.66s/trial, best loss: -0.9452380952380953]


  self.get_booster().save_model(fname)
Successfully registered model 'XGBoost_Final'.
Created version '1' of model 'XGBoost_Final'.


Optimization complete! View results with: mlflow ui --backend-store-uri sqlite:///mlflow.db


## final logging

In [None]:
import mlflow
from xgboost import XGBClassifier
from mlflow.models.signature import infer_signature

# Best parameters from Hyperopt
best_params = {
    'n_estimators': 150,
    'max_depth': 10,
    'learning_rate': 0.1086,
    'subsample': 0.7775,
    'colsample_bytree': 0.6674,
    'gamma': 3.2692
}

# Start MLflow run
with mlflow.start_run(run_name="xgb_Final_Model_Training"):
    # Initialize model with **unpacked** parameters
    final_model = XGBClassifier(
        **best_params,
        random_state=42,
        eval_metric='logloss',
        use_label_encoder=False
    )
    
    # Train with MLflow autologging
    mlflow.xgboost.autolog(
        log_input_examples=True,
        log_model_signatures=True,
        log_models=True
    )
    
    final_model.fit(
        X_train, 
        y_train,
        eval_set=[(X_test, y_test)],
        verbose=True
    )
    
    # Manual logging to ensure all metrics are captured
    y_pred = final_model.predict(X_test)
    y_proba = final_model.predict_proba(X_test)[:, 1]
    
    mlflow.log_metrics({
        "test_accuracy": accuracy_score(y_test, y_pred),
        "test_auc": roc_auc_score(y_test, y_proba),
        "test_f1": f1_score(y_test, y_pred)
    })
    
    # Explicit model logging (redundant but ensures capture)
    mlflow.xgboost.log_model(
        xgb_model=final_model,
        name="production_model",
        signature=infer_signature(X_train, final_model.predict(X_train)),
        input_example=X_train[:1]
    )

    #mlflow.log_artifact("requirements.txt")  # Log your environment
    mlflow.log_param("sklearn_version", sklearn.__version__)

print("Final model trained and logged successfully!")

[0]	validation_0-logloss:0.57831
[1]	validation_0-logloss:0.53912
[2]	validation_0-logloss:0.50484
[3]	validation_0-logloss:0.49163
[4]	validation_0-logloss:0.47939
[5]	validation_0-logloss:0.46449
[6]	validation_0-logloss:0.45762
[7]	validation_0-logloss:0.43405
[8]	validation_0-logloss:0.42758
[9]	validation_0-logloss:0.40361
[10]	validation_0-logloss:0.39940
[11]	validation_0-logloss:0.39640
[12]	validation_0-logloss:0.38898
[13]	validation_0-logloss:0.37424
[14]	validation_0-logloss:0.37158
[15]	validation_0-logloss:0.35887
[16]	validation_0-logloss:0.35311
[17]	validation_0-logloss:0.34174
[18]	validation_0-logloss:0.33370
[19]	validation_0-logloss:0.33421
[20]	validation_0-logloss:0.33074
[21]	validation_0-logloss:0.32853
[22]	validation_0-logloss:0.33095
[23]	validation_0-logloss:0.32818
[24]	validation_0-logloss:0.32514
[25]	validation_0-logloss:0.31778
[26]	validation_0-logloss:0.31624


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[27]	validation_0-logloss:0.31488
[28]	validation_0-logloss:0.31385
[29]	validation_0-logloss:0.30760
[30]	validation_0-logloss:0.30473
[31]	validation_0-logloss:0.30319
[32]	validation_0-logloss:0.30308
[33]	validation_0-logloss:0.30784
[34]	validation_0-logloss:0.30687
[35]	validation_0-logloss:0.30668
[36]	validation_0-logloss:0.30662
[37]	validation_0-logloss:0.30670
[38]	validation_0-logloss:0.30667
[39]	validation_0-logloss:0.30656
[40]	validation_0-logloss:0.30360
[41]	validation_0-logloss:0.30351
[42]	validation_0-logloss:0.30351
[43]	validation_0-logloss:0.30345
[44]	validation_0-logloss:0.30342
[45]	validation_0-logloss:0.30321
[46]	validation_0-logloss:0.30197
[47]	validation_0-logloss:0.30195
[48]	validation_0-logloss:0.30212
[49]	validation_0-logloss:0.30210
[50]	validation_0-logloss:0.30224
[51]	validation_0-logloss:0.30232
[52]	validation_0-logloss:0.30255
[53]	validation_0-logloss:0.30242
[54]	validation_0-logloss:0.30260
[55]	validation_0-logloss:0.30281
[56]	validatio

  self.get_booster().save_model(fname)


Final model trained and logged successfully!


# OR

In [None]:
import mlflow
from xgboost import XGBClassifier

# 1. Configure autolog (do this ONCE at start of script)
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment('Spine-disease-exp')
mlflow.xgboost.autolog(
    log_input_examples=True,
    log_model_signatures=True,
    log_models=True
)

# 2. Your best parameters
params = {
    'n_estimators': 150,
    'max_depth': 10,
    'learning_rate': 0.1086,
    'subsample': 0.7775,
    'colsample_bytree': 0.6674,
    'gamma': 3.2692
}

# 3. Train with automatic tracking
with mlflow.start_run():
    xgb = XGBClassifier(**params, use_label_encoder=False)
    xgb.fit(X_train, y_train, eval_set=[(X_test, y_test)])

        # Manual logging to ensure all metrics are captured
    y_pred = xgb.predict(X_test)
    y_proba = xgb.predict_proba(X_test)[:, 1]
    
    mlflow.log_metrics({
        "test_accuracy": accuracy_score(y_test, y_pred),
        "test_auc": roc_auc_score(y_test, y_proba),
        "test_f1": f1_score(y_test, y_pred)
    })

2025/07/23 21:02:18 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/07/23 21:02:18 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[0]	validation_0-logloss:0.58281
[1]	validation_0-logloss:0.55368
[2]	validation_0-logloss:0.51455
[3]	validation_0-logloss:0.49465
[4]	validation_0-logloss:0.48265
[5]	validation_0-logloss:0.46781
[6]	validation_0-logloss:0.45646
[7]	validation_0-logloss:0.43860
[8]	validation_0-logloss:0.43029
[9]	validation_0-logloss:0.41051
[10]	validation_0-logloss:0.41172
[11]	validation_0-logloss:0.40452
[12]	validation_0-logloss:0.40778
[13]	validation_0-logloss:0.40634
[14]	validation_0-logloss:0.40490
[15]	validation_0-logloss:0.39234
[16]	validation_0-logloss:0.38291
[17]	validation_0-logloss:0.38301
[18]	validation_0-logloss:0.37018
[19]	validation_0-logloss:0.36516
[20]	validation_0-logloss:0.36249
[21]	validation_0-logloss:0.36251
[22]	validation_0-logloss:0.35126
[23]	validation_0-logloss:0.35111
[24]	validation_0-logloss:0.34354
[25]	validation_0-logloss:0.34246
[26]	validation_0-logloss:0.33855
[27]	validation_0-logloss:0.33597
[28]	validation_0-logloss:0.33577
[29]	validation_0-loglos

