## Import Modules

In [53]:
import os
import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Prepare Data

In [56]:
# 1. Load data
data = load_breast_cancer()

In [57]:
# 2. Split data
X_train, X_test, y_train, y_test = train_test_split(
    data.data,
    data.target,
    test_size=0.2,
    random_state=717
    )

## Train XGBoost Model with MLFlow

In [58]:
mlruns_directory = os.path.join(os.getcwd(), "..", "mlruns")
os.makedirs(mlruns_directory, exist_ok=True)

In [63]:
# Set tracking URI to parent directory
mlflow.set_tracking_uri(f"file:{mlruns_directory}")

In [64]:
# Create or set experiment
mlflow.set_experiment("xgboost-breast-cancer")

2025/07/04 17:50:49 INFO mlflow.tracking.fluent: Experiment with name 'xgboost-breast-cancer' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:f:\\GitHub\\xgboost-mlops-pipeline\\model\\notebooks\\..\\mlruns/815168156842703501', creation_time=1751665849318, experiment_id='815168156842703501', last_update_time=1751665849318, lifecycle_stage='active', name='xgboost-breast-cancer', tags={}>

In [69]:
with mlflow.start_run():

    # Define model and hyperparameters
    params = {
        "n_estimators": 100,
        "learning_rate": 0.1,
        "max_depth": 3,
        "eval_metric": "logloss",
        "random_state": 717
    }


    # Log hyperparameters
    mlflow.log_params(params)

    
    # Initialize the model
    model = xgb.XGBClassifier(**params)


    # Train the model
    model.fit(X_train, y_train)


    # Make predictions
    y_pred = model.predict(X_test)
    

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)
    

    # Log model
    mlflow.xgboost.log_model(model, "model")
    
    
    print(f"Run complete. Accuracy: {accuracy:.4f}")


  self.get_booster().save_model(fname)


Run complete. Accuracy: 0.9474
