In [51]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Load the cancer dataset
X, y = datasets.load_breast_cancer(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "n_estimators": 5,
    "max_depth": 5,
    "min_samples_split": 2,
    "min_samples_leaf": 1,
    "max_features": "sqrt",
    "bootstrap": True,
    "criterion": "gini",
    "random_state": 42,
}


'''
# Define the second set of model hyperparameters
params = {
    "n_estimators": 20,
    "max_depth": 10,
    "min_samples_split": 5,
    "min_samples_leaf": 2,
    "max_features": "log2",
    "bootstrap": False,
    "criterion": "entropy",
    "class_weight": "balanced",
    "random_state": 42,
}
'''

# Train the model
rf = RandomForestClassifier(**params)
rf.fit(X_train, y_train)

# Predict on the test set
y_pred = rf.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_absolute_error(y_test, y_pred))
r2 = 1 - np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2)

In [52]:
f1

0.9722222222222222

In [53]:
mae

0.03508771929824561

In [54]:
rmse

0.1873171623163388

In [55]:
r2

0.8506387160170324

In [56]:
# Set our tracking server uri for logging
import time


mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Cancer")

mlflow.enable_system_metrics_logging()

# Start an MLflow run
with mlflow.start_run(log_system_metrics=True):
    time.sleep(15)
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log parameters and metrics to MLflow
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Random Forest model for Breast cancer data")

    # Infer the model signature
    signature = infer_signature(X_train, rf.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=rf,
        artifact_path="breast_cancer_rf_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="breast_cancer_rf_model_demo",
    )

2024/07/15 23:44:24 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Registered model 'breast_cancer_rf_model_demo' already exists. Creating a new version of this model...
2024/07/15 23:44:41 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: breast_cancer_rf_model_demo, version 2
Created version '2' of model 'breast_cancer_rf_model_demo'.
2024/07/15 23:44:41 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/07/15 23:44:41 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [57]:
model_info.model_uri

'runs:/76cb02990d80482eaf567c7706f12a00/breast_cancer_rf_model'

In [58]:
# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

cancer_feature_names = datasets.load_breast_cancer().feature_names

result = pd.DataFrame(X_test, columns=cancer_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]

Downloading artifacts: 100%|██████████| 6/6 [00:00<00:00, 118.27it/s]


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,actual_class,predicted_class
0,12.47,18.6,81.09,481.9,0.09965,0.1058,0.08005,0.03821,0.1925,0.06373,...,96.05,677.9,0.1426,0.2378,0.2671,0.1015,0.3014,0.0875,1,1
1,18.94,21.31,123.6,1130.0,0.09009,0.1029,0.108,0.07951,0.1582,0.05461,...,165.9,1866.0,0.1193,0.2336,0.2687,0.1789,0.2551,0.06589,0,0
2,15.46,19.48,101.7,748.9,0.1092,0.1223,0.1466,0.08087,0.1931,0.05796,...,124.9,1156.0,0.1546,0.2394,0.3791,0.1514,0.2837,0.08019,0,0
3,12.4,17.68,81.47,467.8,0.1054,0.1316,0.07741,0.02799,0.1811,0.07102,...,89.61,515.8,0.145,0.2629,0.2403,0.0737,0.2556,0.09359,1,1
