In [14]:
!pip install xgboost boto3

Collecting boto3
  Downloading boto3-1.23.10-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.5/132.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.0-py3-none-any.whl (23 kB)
Collecting botocore<1.27.0,>=1.26.10
  Downloading botocore-1.26.10-py3-none-any.whl (8.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m80.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hCollecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.2-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.23.10 botocore-1.26.10 jmespath-1.0.0 s3transfer-0.5.2


In [28]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import mlflow
import mlflow.xgboost


mlflow.set_tracking_uri("http://mlflow.mlflow:5000")
print("MLflow Version:", mlflow.__version__)
print("MLflow Tracking URI:", mlflow.get_tracking_uri())
print("XGBoost version:",xgb.__version__)
client = mlflow.tracking.MlflowClient()

def build_data():
    csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    data = pd.read_csv(csv_url, sep=";")
    train, test = train_test_split(data, test_size=0.30, random_state=2019)

    # The predicted column is "quality" which is a scalar from [3, 9]
    X_train = train.drop(["quality"], axis=1)
    X_test = test.drop(["quality"], axis=1)
    y_train = train["quality"]
    y_test = test["quality"]

    return X_train, X_test, y_train, y_test 

def train(experiment_id, run_name, max_depth, min_child_weight, estimators, model_name):
    X_train, X_test, y_train, y_test = build_data()
    
    with mlflow.start_run(experiment_id=experiment_id, run_name=run_name) as run:
        run_id = run.info.run_id
        experiment_id = run.info.experiment_id
        print("MLflow:")
        print("  run_id:", run_id)
        print("  experiment_id:", experiment_id)
        print("  experiment_name:", client.get_experiment(experiment_id).name)

        # MLflow params
        print("Parameters:")
        print("  max_depth:", max_depth)
        print("  min_child_weight:", min_child_weight)
        print("  estimators:", estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("min_child_weight", min_child_weight)
        mlflow.log_param("estimators", estimators)

        # Create and fit model
        model = xgb.XGBRegressor(
                 max_depth=max_depth,
                 min_child_weight=min_child_weight,
                 random_state=42) 
        model.fit(X_train, y_train)

        # MLflow metrics
        predictions = model.predict(X_test)
        #print("predictions:",predictions)
        rmse = np.sqrt(mean_squared_error(y_test, predictions))
        mae = mean_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        print("Metrics:")
        print("  rmse:", rmse)
        print("  mae:", mae)
        print("  r2:", r2)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        # Log model
        mlflow.xgboost.log_model(model, "xgboost-model", registered_model_name=model_name)


MLflow Version: 1.26.1
MLflow Tracking URI: http://mlflow.mlflow:5000
XGBoost version: 1.6.1


In [30]:
train(6, "decrease min child weight", 2, 0.5, 100, None)
train(6, "decrease min child weight", 2, 0.5, 100, None)
train(6, "decrease min child weight", 2, 0.5, 100, None)

MLflow:
  run_id: c8aafba1d91d4ab4a47d68132e90e79f
  experiment_id: 6
  experiment_name: winequality
Parameters:
  max_depth: 2
  min_child_weight: 0.5
  estimators: 100
Metrics:
  rmse: 0.6576366783667097
  mae: 0.5122278740008672
  r2: 0.24454302828711816
MLflow:
  run_id: 22c985ad6152454f9e0ad759b9225948
  experiment_id: 6
  experiment_name: winequality
Parameters:
  max_depth: 2
  min_child_weight: 0.5
  estimators: 100
Metrics:
  rmse: 0.6576366783667097
  mae: 0.5122278740008672
  r2: 0.24454302828711816
MLflow:
  run_id: d7a0ea8ac6164737a0501669bc7bea03
  experiment_id: 6
  experiment_name: winequality
Parameters:
  max_depth: 2
  min_child_weight: 0.5
  estimators: 100
Metrics:
  rmse: 0.6576366783667097
  mae: 0.5122278740008672
  r2: 0.24454302828711816
