In [0]:
# Install required packages in Databricks
%pip install mlflow pandas numpy scikit-learn

In [0]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from mlflow.models import infer_signature

# Load dataset
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Start MLflow experiment
mlflow.set_experiment("/mlops_experiment")

with mlflow.start_run():
    # Train model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Compute metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")
    f1 = f1_score(y_test, y_pred, average="weighted")

    # Log parameters and metrics
    mlflow.log_params({"n_estimators": 100, "random_state": 42})
    mlflow.log_metrics({"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1})

    # Define model signature
    input_example = np.array([X_test[0]])  # Single input example
    signature = infer_signature(X_test, y_pred)

    # Log model with signature
    model_uri = mlflow.sklearn.log_model(
        model,
        "model",
        signature=signature,
        input_example=input_example
    )

    # Register model
    mlflow.register_model(model_uri.model_uri, "RandomForestClassifierModel_new_2")

print("Model training, logging, and registration complete!")


In [0]:
print("TODO add inference logic. Loading model from MLflow")