In [16]:
import os
import joblib
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

os.makedirs("models", exist_ok=True)
os.makedirs("results", exist_ok=True)

iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

def evaluate_and_save(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average="weighted")
    rec = recall_score(y_test, y_pred, average="weighted")
    f1 = f1_score(y_test, y_pred, average="weighted")

    model_path = os.path.join("models", f"{model_name}.pkl")
    joblib.dump(model, model_path)

    print(f"\n{model_name} Results (saved at {model_path}):")
    print(f"Accuracy: {acc:.3f}, Precision: {prec:.3f}, Recall: {rec:.3f}, F1-score: {f1:.3f}")

    return [model_name, acc, prec, rec, f1]

log_reg = LogisticRegression(max_iter=200)
log_reg_results = evaluate_and_save(log_reg, "logistic_regression")

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_results = evaluate_and_save(rf, "random_forest")

svm = SVC(kernel="linear", probability=True, random_state=42)
svm_results = evaluate_and_save(svm, "svm")

results = [log_reg_results, rf_results, svm_results]
df_results = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-score"])
df_results.to_csv("results/model_comparison.csv", index=False)

print("\nComparison table saved to results/model_comparison.csv")
print(df_results)


logistic_regression Results (saved at models\logistic_regression.pkl):
Accuracy: 0.933, Precision: 0.933, Recall: 0.933, F1-score: 0.933

random_forest Results (saved at models\random_forest.pkl):
Accuracy: 0.900, Precision: 0.902, Recall: 0.900, F1-score: 0.900

svm Results (saved at models\svm.pkl):
Accuracy: 1.000, Precision: 1.000, Recall: 1.000, F1-score: 1.000

Comparison table saved to results/model_comparison.csv
                 Model  Accuracy  Precision    Recall  F1-score
0  logistic_regression  0.933333   0.933333  0.933333  0.933333
1        random_forest  0.900000   0.902357  0.900000  0.899749
2                  svm  1.000000   1.000000  1.000000  1.000000


In [5]:
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


os.makedirs("models", exist_ok=True)
os.makedirs("results", exist_ok=True)

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def train_and_log(model, model_name, params=None):
    with mlflow.start_run(run_name=model_name):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="macro")
        rec = recall_score(y_test, y_pred, average="macro")
        f1 = f1_score(y_test, y_pred, average="macro")
        if params:
            for k, v in params.items():
                mlflow.log_param(k, v)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("f1_score", f1)

        model_path = f"models/{model_name}.pkl"
        joblib.dump(model, model_path)
        mlflow.log_artifact(model_path, artifact_path="models")

        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(5,4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=iris.target_names,
                    yticklabels=iris.target_names)
        plt.title(f"Confusion Matrix - {model_name}")
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plot_path = f"results/{model_name}_cm.png"
        plt.savefig(plot_path)
        plt.close()
        mlflow.log_artifact(plot_path, artifact_path="plots")

        print(f"{model_name} -> Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")


train_and_log(LogisticRegression(max_iter=200), "LogisticRegression", {"max_iter": 200})
train_and_log(RandomForestClassifier(n_estimators=100, random_state=42), "RandomForest", {"n_estimators": 100})
train_and_log(SVC(kernel="linear"), "SVM", {"kernel": "linear"})

LogisticRegression -> Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
RandomForest -> Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
SVM -> Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000


In [8]:
import os
import joblib
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ensure directories exist
os.makedirs("../models", exist_ok=True)
os.makedirs("../results", exist_ok=True)

# Models to train
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel="linear", probability=True, random_state=42)
}

# Start MLflow experiment
mlflow.set_experiment("Iris-Models")

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        # Train model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="weighted")
        rec = recall_score(y_test, y_pred, average="weighted")
        f1 = f1_score(y_test, y_pred, average="weighted")

        # Log metrics
        mlflow.log_param("model", model_name)
        if model_name == "Random Forest":
            mlflow.log_param("n_estimators", 100)
        if model_name == "Logistic Regression":
            mlflow.log_param("max_iter", 200)
        if model_name == "SVM":
            mlflow.log_param("kernel", "linear")

        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("f1_score", f1)

        # Save model
        model_path = f"../models/{model_name.replace(' ', '_').lower()}.pkl"
        joblib.dump(model, model_path)
        mlflow.log_artifact(model_path)

        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(5, 4))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                    xticklabels=iris.target_names,
                    yticklabels=iris.target_names)
        plt.title(f"{model_name} - Confusion Matrix")
        plt.xlabel("Predicted")
        plt.ylabel("True")

        cm_path = f"../results/{model_name.replace(' ', '_').lower()}_cm.png"
        plt.savefig(cm_path)
        plt.close()
        mlflow.log_artifact(cm_path)

        print(f"{model_name} logged with Accuracy={acc:.3f}, F1={f1:.3f}")


2025/09/16 22:04:12 INFO mlflow.tracking.fluent: Experiment with name 'Iris-Models' does not exist. Creating a new experiment.


Logistic Regression logged with Accuracy=0.933, F1=0.933
Random Forest logged with Accuracy=0.900, F1=0.900
SVM logged with Accuracy=1.000, F1=1.000
