In [18]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import xgboost

import mlflow
from mlflow.data.pandas_dataset import PandasDataset


In [19]:

mlflow.set_tracking_uri("http://localhost:5000")

dataset_source_url = "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-white.csv"
raw_data = pd.read_csv(dataset_source_url, delimiter=";")

# Extract the features and target data separately
y = raw_data["quality"]
X = raw_data.drop("quality", axis=1)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=34
)



In [20]:
# Create a label encoder object
le = LabelEncoder()

# Fit and transform the target variable
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Fit an XGBoost binary classifier on the training data split
model = xgboost.XGBClassifier().fit(X_train, y_train_encoded)

# Build the Evaluation Dataset from the test set
y_test_pred = model.predict(X=X_test)

eval_data = X_test
eval_data["label"] = y_test

# Assign the decoded predictions to the Evaluation Dataset
eval_data["predictions"] = le.inverse_transform(y_test_pred)

# Create the PandasDataset for use in mlflow evaluate
pd_dataset = mlflow.data.from_pandas(
    eval_data, predictions="predictions", targets="label"
)


In [None]:
mlflow.set_experiment("White Wine Quality Demo")
# Log the Dataset, model, and execute an evaluation run using the configured Dataset
with mlflow.start_run() as run:
    mlflow.log_input(pd_dataset, context="training")

    mlflow.xgboost.log_model(
        artifact_path="white-wine-xgb", xgb_model=model, 
        input_example=X_test,
        registered_model_name="white-wine-xgb-model"
    )

    result = mlflow.evaluate(data=pd_dataset, predictions=None, model_type="classifier")

In [None]:
result.metrics