In [1]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import joblib
import os
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Ensure folders exist
os.makedirs("../models", exist_ok=True)
os.makedirs("../results", exist_ok=True)

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True)  # probability=True for better logging/plots
}

In [4]:
# Set MLflow experiment
mlflow.set_experiment("iris_model_comparison")

for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Log model parameters
        if hasattr(model, "get_params"):
            mlflow.log_params(model.get_params())
        
        # Train
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        prec = precision_score(y_test, y_pred, average='weighted')
        rec = recall_score(y_test, y_pred, average='weighted')

        # Print
        print(f"{name}: Accuracy={acc:.3f}, F1={f1:.3f}, Precision={prec:.3f}, Recall={rec:.3f}")

        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)

        # Save model locally
        model_path = f"../models/{name.replace(' ', '_').lower()}.pkl"
        joblib.dump(model, model_path)

        # Log model to MLflow
        mlflow.sklearn.log_model(model, name="model")
        
        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(5,4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=iris.target_names,
                    yticklabels=iris.target_names)
        plt.title(f"Confusion Matrix - {name}")
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plot_path = f"../results/{name.replace(' ', '_').lower()}_cm.png"
        plt.savefig(plot_path)
        plt.close()

        # Log confusion matrix to MLflow
        mlflow.log_artifact(plot_path)

Logistic Regression: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000




Random Forest: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000




SVM: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000


