In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import mlflow
from mlflow.tracking import MlflowClient



In [2]:
# start mlflow in terminal: mlflow server --host 127.0.0.1 --port 5000
# Set MLflow tracking URI
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# Check if the experiment exists
experiment_name = "iris_model_experiment"
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)

if experiment is None:
    # Create the experiment if it does not exist
    experiment_id = client.create_experiment(experiment_name)
    print(f"Created new experiment: {experiment_name} with ID: {experiment_id}")
else:
    # Use the existing experiment
    experiment_id = experiment.experiment_id
    print(f"Using existing experiment: {experiment_name} with ID: {experiment_id}")

# Set the experiment
mlflow.set_experiment(experiment_name)



Using existing experiment: iris_model_experiment with ID: 235202613938940406


<Experiment: artifact_location='mlflow-artifacts:/235202613938940406', creation_time=1745139652040, experiment_id='235202613938940406', last_update_time=1745139652040, lifecycle_stage='active', name='iris_model_experiment', tags={}>

In [3]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data.astype(np.float32)
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Define an input example for MLflow logging
input_example = np.array([X_train[0]])



In [5]:
# Define the models and their hyperparameters
models = {
    "LogisticRegression": {
        "model": LogisticRegression(),
        "params": {
            "C": [0.1, 1, 10],
            "solver": ["liblinear"]
        }
    },
    "RandomForest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [10, 50, 100],
            "max_depth": [None, 10, 20]
        }
    },
    "DesicisionTree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth' : [2,3,4,5],
            'min_samples_split': [2,3,4,5],
            'min_samples_leaf': [1,2,3,4,5]
        }
    },
    "SVM": {
        "model": SVC(),
        "params": {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"]
        }
    }
}




# Start an MLflow run
with mlflow.start_run() as run:
    best_model = None
    best_score = 0
    best_params = None

    for model_name, model_info in models.items():
        clf = GridSearchCV(model_info["model"], model_info["params"], cv=5)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, output_dict=True)
        


        # Log parameters, metrics, and the model with MLflow
        mlflow.log_param(f"{model_name}_params", clf.best_params_)
        mlflow.log_metric(f"{model_name}_accuracy", accuracy)
        mlflow.sklearn.log_model(sk_model=clf.best_estimator_, artifact_path=f"{model_name}_model", input_example=input_example)

        for label, metrics in report.items():
            if isinstance(metrics, dict):
                for metric_name, value in metrics.items():
                    mlflow.log_metric(f"{model_name}_{label}_{metric_name}", value)

        print(f"{model_name} accuracy: {accuracy}")
        
        if accuracy > best_score:
            best_model = model_name
            best_score = accuracy
            best_params = clf.best_params_

    print(f"Best model: {best_model} with accuracy: {best_score} and params: {best_params}")

    # Retrieve the run ID
    run_id = run.info.run_id

    # Register the decision tree model
    model_uri = f"runs:/{run_id}/DesicisionTree_model"
    mlflow.register_model(model_uri, "iris_model")
 

    print(f"DecicisionTreeModel registered with name: iris_model.")




Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

LogisticRegression accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

RandomForest accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

DesicisionTree accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

SVM accuracy: 1.0
Best model: SVM with accuracy: 1.0 and params: {'C': 1, 'kernel': 'linear'}


Registered model 'iris_model' already exists. Creating a new version of this model...
2025/04/21 11:41:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_model, version 2


DecicisionTreeModel registered with name: iris_model.
🏃 View run kindly-goose-594 at: http://127.0.0.1:5000/#/experiments/235202613938940406/runs/49d06e18a3d547fc82094d02b5bdc620
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/235202613938940406


Created version '2' of model 'iris_model'.
