In [3]:
import sys, mlflow
print(sys.executable)
print(mlflow.__version__)






C:\Users\JHON WICK\PycharmProjects\mlops-assignment-1-Abdullah\.venv\Scripts\python.exe
3.4.0


In [6]:

import os
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix
)

# === Setup directories ===
os.makedirs("models", exist_ok=True)
os.makedirs("results", exist_ok=True)

# 1. Load dataset
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 2. Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True)
}

# === MLflow setup ===
mlflow.set_experiment("mlops-assignment-1")

# 3. Train, evaluate, save
results = []

for name, model in models.items():
    with mlflow.start_run(run_name=name):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="weighted")
        rec = recall_score(y_test, y_pred, average="weighted")
        f1 = f1_score(y_test, y_pred, average="weighted")

        results.append([name, acc, prec, rec, f1])

        # Save trained model locally
        model_path = f"models/{name.replace(' ', '_')}.joblib"
        joblib.dump(model, model_path)

        # Save confusion matrix as plot
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(5, 4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=iris.target_names,
                    yticklabels=iris.target_names)
        plt.title(f"Confusion Matrix - {name}")
        plt.ylabel("True Label")
        plt.xlabel("Predicted Label")
        plt.tight_layout()
        cm_path = f"results/{name.replace(' ', '_')}_confusion_matrix.png"
        plt.savefig(cm_path)
        plt.close()

        # === MLflow Logging ===
        mlflow.log_params(model.get_params())
        mlflow.log_metrics({
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1_score": f1
        })
        mlflow.log_artifact(cm_path)
        mlflow.sklearn.log_model(model, artifact_path="model")

# 4. Save metrics table
results_df = pd.DataFrame(
    results,
    columns=["Model", "Accuracy", "Precision", "Recall", "F1-score"]
)
metrics_path = "results/metrics.csv"
results_df.to_csv(metrics_path, index=False)

# Log the metrics table to MLflow
mlflow.log_artifact(metrics_path)

print(" Models saved in /models")
print(" Metrics and confusion matrices saved in /results")
print(" Metrics & artifacts logged to MLflow")



 Models saved in /models
 Metrics and confusion matrices saved in /results
 Metrics & artifacts logged to MLflow


In [17]:
!where python
!python -m pip show mlflow


C:\Users\JHON WICK\AppData\Local\Programs\Python\Python313\python.exe
C:\Users\JHON WICK\AppData\Local\Microsoft\WindowsApps\python.exe
Name: mlflow
Version: 3.3.2
Summary: MLflow is an open source platform for the complete machine learning lifecycle
Home-page: https://mlflow.org
Author: 
Author-email: 
License: Copyright 2018 Databricks, Inc.  All rights reserved.

                                Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, ar

In [8]:
import os
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix
)
from mlflow.models.signature import infer_signature

# === Setup directories ===
os.makedirs("models", exist_ok=True)
os.makedirs("results", exist_ok=True)

# 1. Load dataset
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 2. Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', probability=True)
}

# === MLflow setup ===
mlflow.set_experiment("mlops-assignment-1")

# 3. Train, evaluate, save
results = []

# Close any previous stuck runs
mlflow.end_run()

for name, model in models.items():
    try:
        with mlflow.start_run(run_name=name):
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            acc = accuracy_score(y_test, y_pred)
            prec = precision_score(y_test, y_pred, average="weighted")
            rec = recall_score(y_test, y_pred, average="weighted")
            f1 = f1_score(y_test, y_pred, average="weighted")

            results.append([name, acc, prec, rec, f1])

            # Save trained model locally
            model_path = f"models/{name.replace(' ', '_')}.joblib"
            joblib.dump(model, model_path)

            # Save confusion matrix as plot
            cm = confusion_matrix(y_test, y_pred)
            plt.figure(figsize=(5, 4))
            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                        xticklabels=iris.target_names,
                        yticklabels=iris.target_names)
            plt.title(f"Confusion Matrix - {name}")
            plt.ylabel("True Label")
            plt.xlabel("Predicted Label")
            plt.tight_layout()
            cm_path = f"results/{name.replace(' ', '_')}_confusion_matrix.png"
            plt.savefig(cm_path)
            plt.close()

            # === MLflow Logging ===
            mlflow.log_params(model.get_params())
            mlflow.log_metrics({
                "accuracy": acc,
                "precision": prec,
                "recall": rec,
                "f1_score": f1
            })
            mlflow.log_artifact(cm_path)

            # Infer model signature for better tracking
            signature = infer_signature(X_train, model.predict(X_train))

            # Log & Register Model
            mlflow.sklearn.log_model(
                sk_model=model,
                name="model",
                input_example=X_test[:5],
                signature=signature,
                registered_model_name=name.replace(" ", "_")
            )
    finally:
        # Ensure run closes even if error happens
        mlflow.end_run()

# 4. Save metrics table
results_df = pd.DataFrame(
    results,
    columns=["Model", "Accuracy", "Precision", "Recall", "F1-score"]
)
metrics_path = "results/metrics.csv"
results_df.to_csv(metrics_path, index=False)
mlflow.log_artifact(metrics_path)

# === Best Model Selection ===
best_model = results_df.sort_values(by="Accuracy", ascending=False).iloc[0]
print("\n=== Best Model Selection ===")
print("Best Model:", best_model["Model"])
print("Accuracy:", best_model["Accuracy"])
print("Metrics saved in results/metrics.csv")
print("This model has been registered in MLflow Model Registry with versioning.")


Successfully registered model 'Logistic_Regression'.
Created version '1' of model 'Logistic_Regression'.
Successfully registered model 'Random_Forest'.
Created version '1' of model 'Random_Forest'.
Successfully registered model 'SVM'.
Created version '1' of model 'SVM'.



=== Best Model Selection ===
Best Model: Logistic Regression
Accuracy: 1.0
Metrics saved in results/metrics.csv
This model has been registered in MLflow Model Registry with versioning.
