# MLOps Example Notebook - Iris Classifier

This notebook demonstrates the required cell tags for the MLOps platform.
Each code cell has a `tags` entry in its metadata that the platform uses
to identify pipeline phases.

**Required tags:** `mlops:config`, `mlops:preprocessing`, `mlops:training`, `mlops:export`

**Optional tags:** `mlops:data`, `mlops:evaluation`

In [None]:
# Papermill injected parameters (do not edit this cell manually)
# These are overwritten at runtime by the pipeline.
MODEL_OUTPUT_PATH = "./model.joblib"
PIPELINE_ID = "local-dev"
MLFLOW_TRACKING_URI = "http://localhost:5000"

In [None]:
# ============================================================
# mlops:config - Model metadata
# ============================================================
# Adapt MODEL_NAME and VERSION for your project.
# The platform reads these values to register the model.

MODEL_NAME = "iris-classifier"
VERSION = "1"

print(f"Model: {MODEL_NAME} v{VERSION}")
print(f"Pipeline ID: {PIPELINE_ID}")

In [None]:
# ============================================================
# mlops:data - Data loading
# ============================================================
# Replace this with your own data loading logic.
# The platform does not require a specific data source.

from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name="target")

print(f"Dataset shape: {X.shape}")
print(f"Classes: {list(iris.target_names)}")

In [None]:
# ============================================================
# mlops:preprocessing - Data preparation
# ============================================================
# Add feature engineering, scaling, splitting, etc.

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {X_train_scaled.shape}, Test: {X_test_scaled.shape}")

In [None]:
# ============================================================
# mlops:training - Model training
# ============================================================
# Replace RandomForestClassifier with your own model.

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=5,
    random_state=42,
)

model.fit(X_train_scaled, y_train)
print("Training complete.")

In [None]:
# ============================================================
# mlops:evaluation - Metrics and logging
# ============================================================
# Log metrics to MLflow. The platform reads 'accuracy' for
# auto-deployment decisions.

from sklearn.metrics import accuracy_score, classification_report
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

y_pred = model.predict(X_test_scaled)
score = accuracy_score(y_test, y_pred)

print(classification_report(y_test, y_pred, target_names=list(iris.target_names)))
print(f"Accuracy: {score:.4f}")

# Log to MLflow (the pipeline wraps execution in an MLflow run)
mlflow.log_metric("accuracy", score)
mlflow.log_metric("n_estimators", 100)
mlflow.log_metric("max_depth", 5)

In [None]:
# ============================================================
# mlops:export - Save model artifact
# ============================================================
# Save the trained model to MODEL_OUTPUT_PATH.
# The platform picks up this file and registers it in MLflow.

import joblib

joblib.dump(model, MODEL_OUTPUT_PATH)
print(f"Model saved to {MODEL_OUTPUT_PATH}")