In [34]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import mlflow
from mlflow.tracking import MlflowClient



In [35]:
# start mlflow in terminal: mlflow server --host 127.0.0.1 --port 5000
# Set MLflow tracking URI
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# Check if the experiment exists
experiment_name = "iris_models_experiment"
client = MlflowClient()
experiment = client.get_experiment_by_name(experiment_name)

if experiment is None:
    # Create the experiment if it does not exist
    experiment_id = client.create_experiment(experiment_name)
    print(f"Created new experiment: {experiment_name} with ID: {experiment_id}")
else:
    # Use the existing experiment
    experiment_id = experiment.experiment_id
    print(f"Using existing experiment: {experiment_name} with ID: {experiment_id}")

# Set the experiment
mlflow.set_experiment(experiment_name)



Using existing experiment: iris_models_experiment with ID: 124924383425761890


<Experiment: artifact_location='mlflow-artifacts:/124924383425761890', creation_time=1745066609852, experiment_id='124924383425761890', last_update_time=1745066609852, lifecycle_stage='active', name='iris_models_experiment', tags={}>

In [36]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data.astype(np.float32)
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Define an input example for MLflow logging
input_example = np.array([X_train[0]])



In [54]:
# Define the models and their hyperparameters
models = {
    "LogisticRegression": {
        "model": LogisticRegression(),
        "params": {
            "C": [0.1, 1, 10],
            "solver": ["liblinear"]
        }
    },
    "RandomForest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [10, 50, 100],
            "max_depth": [None, 10, 20]
        }
    },
    "DesicisionTree": {
        "model": DecisionTreeClassifier(),
        "params": {
            'max_depth' : [2,3,4,5],
            'min_samples_split': [2,3,4,5],
            'min_samples_leaf': [1,2,3,4,5]
        }
    },
    "SVM": {
        "model": SVC(),
        "params": {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"]
        }
    }
}




# Start an MLflow run
with mlflow.start_run():
    best_model = None
    best_score = 0
    best_params = None

    for model_name, model_info in models.items():
        clf = GridSearchCV(model_info["model"], model_info["params"], cv=5)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, output_dict=True)
        


        # Log parameters, metrics, and the model with MLflow
        mlflow.log_param(f"{model_name}_params", clf.best_params_)
        mlflow.log_metric(f"{model_name}_accuracy", accuracy)
        mlflow.sklearn.log_model(sk_model=clf.best_estimator_, artifact_path=f"{model_name}_model", input_example=input_example)

        for label, metrics in report.items():
            if isinstance(metrics, dict):
                for metric_name, value in metrics.items():
                    mlflow.log_metric(f"{model_name}_{label}_{metric_name}", value)

        print(f"{model_name} accuracy: {accuracy}")

        if accuracy > best_score:
            best_model = model_name
            best_score = accuracy
            best_params = clf.best_params_

    print(f"Best model: {best_model} with accuracy: {best_score} and params: {best_params}")

# To view results in MLflow UI, run:
# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 127.0.0.1 --port 5000

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

LogisticRegression accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

RandomForest accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

DesicisionTree accuracy: 0.9555555555555556


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

SVM accuracy: 1.0
Best model: SVM with accuracy: 1.0 and params: {'C': 1, 'kernel': 'linear'}
🏃 View run mysterious-hog-944 at: http://127.0.0.1:5000/#/experiments/124924383425761890/runs/fb815fb22b184623a0b85c1063dbccce
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/124924383425761890
