In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import mlflow
import mlflow.sklearn
import random
import os


SEED = 42
np.random.seed(SEED)
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED
)

distance_metrics = ['euclidean', 'manhattan', 'chebyshev']
k_values = [1, 3, 5, 7, 9]

results = []

mlflow.set_experiment("KNN_Iris_Experiment")

for metric in distance_metrics:
    for k in k_values:
        with mlflow.start_run(run_name=f"KNN_k={k}_{metric}"):

            # Train model
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
            knn.fit(X_train, y_train)

            # Predict
            y_pred = knn.predict(X_test)

            # Evaluate
            acc = accuracy_score(y_test, y_pred)
            cm = confusion_matrix(y_test, y_pred)
            report = classification_report(y_test, y_pred, output_dict=True)

            # Store results
            results.append({
                "K": k,
                "Metric": metric,
                "Accuracy": acc
            })
            # =========================
            # 4. Log with MLflow (MLOps step)
            # =========================
            mlflow.log_param("K", k)
            mlflow.log_param("Metric", metric)
            mlflow.log_metric("Accuracy", acc)
            mlflow.sklearn.log_model(knn, f"knn_model_k{k}_{metric}")

            print(f"[INFO] k={k}, metric={metric}, accuracy={acc:.4f}")
            print("Confusion Matrix:\n", cm)

df_results = pd.DataFrame(results)
print("\nSummary of Results:")
print(df_results)
