In [11]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import joblib
import os
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [12]:
# ---------------------------
# MLflow Centralized Setup
# ---------------------------
mlflow.set_tracking_uri("file:///C:/Users/Ausaf raza/mlops-assignment-1/mlruns")
mlflow.set_experiment("iris_model_comparison")
# ---------------------------

Traceback (most recent call last):
  File "C:\Users\Ausaf raza\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages\mlflow\store\tracking\file_store.py", line 367, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "C:\Users\Ausaf raza\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages\mlflow\store\tracking\file_store.py", line 465, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "C:\Users\Ausaf raza\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages\mlflow\store\tracking\file_store.py", line 1635, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "C:\Users\Ausaf raza\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCach

<Experiment: artifact_location='file:///C:/Users/Ausaf raza/mlops-assignment-1/mlruns/868616821754101766', creation_time=1758212798089, experiment_id='868616821754101766', last_update_time=1758212798089, lifecycle_stage='active', name='iris_model_comparison', tags={}>

In [13]:
# Ensure folders exist
os.makedirs("../models", exist_ok=True)
os.makedirs("../results", exist_ok=True)

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(probability=True)  # probability=True for better logging/plots
}

In [15]:

for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Log model parameters
        if hasattr(model, "get_params"):
            mlflow.log_params(model.get_params())
        
        # Train
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        prec = precision_score(y_test, y_pred, average='weighted')
        rec = recall_score(y_test, y_pred, average='weighted')

        # Print
        print(f"{name}: Accuracy={acc:.3f}, F1={f1:.3f}, Precision={prec:.3f}, Recall={rec:.3f}")

        # Log metrics
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)

        # Save model locally
        model_path = f"../models/{name.replace(' ', '_').lower()}.pkl"
        joblib.dump(model, model_path)

        # Log model to MLflow
        mlflow.sklearn.log_model(model, name="model")
        
        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(5,4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=iris.target_names,
                    yticklabels=iris.target_names)
        plt.title(f"Confusion Matrix - {name}")
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plot_path = f"../results/{name.replace(' ', '_').lower()}_cm.png"
        plt.savefig(plot_path)
        plt.close()

        # Log confusion matrix to MLflow
        mlflow.log_artifact(plot_path)

Logistic Regression: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000




Random Forest: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000




SVM: Accuracy=1.000, F1=1.000, Precision=1.000, Recall=1.000




In [16]:

# Assume Random Forest is best
best_model = models["Random Forest"]
model_name = "iris_best_model"

with mlflow.start_run(run_name="Best_Model_Registration"):
    # Log the model with name 
    input_example = X_test[:1] 
    mlflow.sklearn.log_model(
        sk_model=best_model,
        name="model",
        input_example=input_example,
        registered_model_name=model_name  # registers in Model Registry
    )

    # Confusion Matrix Plot
    y_pred = best_model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)

    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=iris.target_names,
                yticklabels=iris.target_names)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.xlabel("Predicted")
    plt.ylabel("True")

    plot_path = "../results/best_model_confusion_matrix.png"
    plt.savefig(plot_path)
    plt.close()

    # Log confusion matrix to MLflow
    mlflow.log_artifact(plot_path)

    print(f"Registered model '{model_name}' in MLflow Model Registry with confusion matrix artifact")


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 432.71it/s]  
Successfully registered model 'iris_best_model'.
Created version '1' of model 'iris_best_model'.


Registered model 'iris_best_model' in MLflow Model Registry with confusion matrix artifact
