In [1]:
import numpy as np
import matplotlib as plt
import sklearn.datasets as datasets
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
iris_data = datasets.load_iris()

In [3]:
iris_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [4]:
X = iris_data["data"]
y = iris_data["target"]

In [5]:
iris_data["target_names"]

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [6]:
iris_data["feature_names"]

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [7]:
X_df = pd.DataFrame(X, columns=iris_data["feature_names"])
y_df = pd.DataFrame(y, columns=["target"])

In [8]:
df = pd.concat((X_df, y_df), axis=1)

In [9]:
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=42)

In [11]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [12]:
pred = rf.predict(X_test)
pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [13]:
dir(mlflow)

['ActiveRun',
 'Image',
 'LazyLoader',
 'MLFLOW_CONFIGURE_LOGGING',
 'MlflowClient',
 'MlflowException',
 'RunOperations',
 'VERSION',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_configure_mlflow_loggers',
 'active_run',
 'add_trace',
 'anthropic',
 'artifacts',
 'autogen',
 'autolog',
 'azure',
 'bedrock',
 'catboost',
 'client',
 'config',
 'contextlib',
 'create_experiment',
 'crewai',
 'data',
 'delete_experiment',
 'delete_run',
 'delete_tag',
 'disable_system_metrics_logging',
 'diviner',
 'doctor',
 'dspy',
 'enable_system_metrics_logging',
 'end_run',
 'entities',
 'environment_variables',
 'evaluate',
 'exceptions',
 'fastai',
 'flush_artifact_async_logging',
 'flush_async_logging',
 'flush_trace_async_logging',
 'gateway',
 'gemini',
 'get_artifact_uri',
 'get_current_active_span',
 'get_experiment',
 'get_experiment_by_name',
 'get_last_active_trace',
 'get_paren

In [14]:
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [15]:
metrics = {"accuracy_score": accuracy_score(y_test, pred),
           "precision_score_0": precision_score(y_test, pred, average=None)[0],
           "precision_score_1": precision_score(y_test, pred, average=None)[1],
           "precision_score_2": precision_score(y_test, pred, average=None)[2],

           "recall_score_0": recall_score(y_test, pred, average=None)[0],
           "recall_score_1": recall_score(y_test, pred, average=None)[1],
           "recall_score_2": recall_score(y_test, pred, average=None)[2],

           "f1_score_0": f1_score(y_test, pred, average=None)[0],
           "f1_score_1": f1_score(y_test, pred, average=None)[1],
           "f1_score_2": f1_score(y_test, pred, average=None)[2],
           }

In [16]:
metrics

{'accuracy_score': 1.0,
 'precision_score_0': 1.0,
 'precision_score_1': 1.0,
 'precision_score_2': 1.0,
 'recall_score_0': 1.0,
 'recall_score_1': 1.0,
 'recall_score_2': 1.0,
 'f1_score_0': 1.0,
 'f1_score_1': 1.0,
 'f1_score_2': 1.0}

In [17]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

mlflow.set_experiment("test")

with mlflow.start_run():
    mlflow.log_params(rf.get_params())
    mlflow.log_metrics(metrics)

    mlflow.set_tag("Training Info", "Basic RF model for iris data")

    signature = infer_signature(X_train, rf.predict(X_train))
    
    model_info = mlflow.sklearn.log_model(
        sk_model = rf,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "tracking-quickstart"
    )

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2025/02/26 11:19:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 3


🏃 View run skittish-sow-719 at: http://127.0.0.1:5000/#/experiments/209548166850343233/runs/f0be3302993147499a88aa6f2ba497db
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/209548166850343233


Created version '3' of model 'tracking-quickstart'.
