**Umgebung Einstellungen **


In [None]:
%pip install "zenml[server]"  # install ZenML
!zenml integration install sklearn mlflow evidently -y  # install ZenML integrations
!zenml init  # Initialize a ZenML repository
%pip install pyparsing==2.4.2  # required for Colab

import IPython

# automatically restart kernel
IPython.Application.instance().kernel.do_shutdown(restart=True)
NGROK_TOKEN = "2PwBKgYHkhfGIKihk3LZ77LAUo4_4yLTDK9r7FtXeDeHz562y"
from zenml.environment import Environment

if Environment.in_google_colab():  # Colab only setup
    # install ngrok and set auth token
    !pip install pyngrok
    !ngrok authtoken {NGROK_TOKEN}

**Stacks zum Projekt hinzufügen**

In [None]:
# Register the MLflow experiment tracker
!zenml experiment-tracker register mlflow_tracker --flavor=mlflow

# Register the MLflow model registry
!zenml model-registry register mlflow_registry --flavor=mlflow

# Register the MLflow model deployer
!zenml model-deployer register mlflow_deployer --flavor=mlflow

# Register the Evidently data validator
!zenml data-validator register evidently_validator --flavor=evidently

# Register a new stack with the new stack components
!zenml stack register my_project_stack -a default\
                                       -o default\
                                       -d mlflow_deployer\
                                       -e mlflow_tracker\
                                       -r mlflow_registry\
                                       -dv evidently_validator\

!zenml stack set my_project_stack

# Visualize the current ZenML stack
!zenml stack describe

 **Daten Beschaffung für das Training Pipeline und das Inference Pipeline**

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

from zenml import step
from zenml.steps import Output


@step
def training_data_loader() -> Output(
    X_train=pd.DataFrame,
    X_test=pd.DataFrame,
    y_train=pd.Series,
    y_test=pd.Series,
):
    """Load the iris dataset as tuple of Pandas DataFrame / Series."""
    iris = load_iris(as_frame=True)
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=0.2, shuffle=True, random_state=42
    )
    return X_train, X_test, y_train, y_test

@step
def inference_data_loader() -> pd.DataFrame:
    """Load some (random) inference data."""
    return pd.DataFrame(
        data=np.random.rand(10, 4) * 10,  # assume range [0, 10]
        columns=load_iris(as_frame=True).data.columns,
    )

**Modell Trainieren SVC Algorithm**

In [None]:
import mlflow

from sklearn.base import ClassifierMixin
from sklearn.svm import SVC

from zenml.client import Client

experiment_tracker = Client().active_stack.experiment_tracker

@step(enable_cache=False, experiment_tracker=experiment_tracker.name)
def svc_trainer_mlflow(
    X_train: pd.DataFrame,
    y_train: pd.Series,
) -> ClassifierMixin:
    """trainieren  sklearn SVC classifier und log in MLflow."""
    mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow
    model = SVC(gamma=0.01)
    model.fit(X_train.to_numpy(), y_train.to_numpy())
    train_acc = model.score(X_train.to_numpy(), y_train.to_numpy())
    print(f"Train accuracy: {train_acc}")
    return model

**Modell Bewertung**

In [None]:
@step
def evaluator(
    X_test: pd.DataFrame,
    y_test: pd.Series,
    model: ClassifierMixin,
) -> float:
    """Accuracy auf test set berechnen"""
    test_acc = model.score(X_test.to_numpy(), y_test.to_numpy())
    print(f"Test accuracy: {test_acc}")
    return test_acc

# Deployment Trigger: Das Modell deployen falls die Ergebnisse gut sind also ab 90%

In [None]:
@step
def deployment_trigger(test_acc: float) -> bool:
    """nur deployen wenn accuracy > 90%."""
    return test_acc > 0.9

# Modell Deployment

In [None]:
from zenml.integrations.mlflow.steps.mlflow_deployer import mlflow_model_registry_deployer_step
from zenml.integrations.mlflow.steps.mlflow_registry import mlflow_register_model_step
from zenml.model_registries.base_model_registry import ModelRegistryModelMetadata

model_deployer = mlflow_model_registry_deployer_step.with_options(
    parameters=dict(
        registry_model_name="my-model",
        registry_model_version="1",
        # or you can use the model stage if you have set it in the MLflow registry
        # registered_model_stage="None" # "Staging", "Production", "Archived"
    )
)

# Data Drift

In [None]:
from zenml.integrations.evidently.steps import (
    evidently_profile_step,
)

drift_detector = evidently_profile_step.with_options(
    parameters=dict(profile_sections=["datadrift"])
)

# Prediction Loader

In [None]:
from zenml.services import BaseService
from zenml.client import Client


@step(enable_cache=False)
def prediction_service_loader() -> BaseService:
    """Load the model service of our train_evaluate_deploy_pipeline."""
    client = Client()
    model_deployer = client.active_stack.model_deployer
    services = model_deployer.find_model_server(
        pipeline_name="training_pipeline",
        pipeline_step_name="model_deployer",
        running=True,
    )
    service = services[0]
    return service

# Predictior

In [None]:
@step
def predictor(
    service: BaseService,
    data: pd.DataFrame,
) -> Output(predictions=list):
    """Run a inference request against a prediction service"""
    service.start(timeout=10)  # should be a NOP if already started
    prediction = service.predict(data.to_numpy())
    prediction = prediction.argmax(axis=-1)
    print(f"Prediction is: {[prediction.tolist()]}")
    return [prediction.tolist()]

# Training Pipeline ausführen

In [None]:
from zenml import pipeline

@pipeline(enable_cache=False)
def training_pipeline():
    """Train, evaluate, and deploy a model."""
    X_train, X_test, y_train, y_test = training_data_loader()
    model = svc_trainer_mlflow(X_train=X_train, y_train=y_train)
    evaluator(X_test=X_test, y_test=y_test, model=model)
    mlflow_register_model_step.with_options(
        parameters=dict(
            name="my-model",
            metadata=ModelRegistryModelMetadata(gamma=0.01, arch="svc"),
            description="The first run of the Quickstart pipeline.",
        )
    )(model)

training_pipeline()

# Log

In [None]:
!zenml model-registry models list

!zenml model-registry models list-versions my-model

# Inference Pipeline deployen

In [None]:
@pipeline()
def inference_pipeline():
    """Inference pipeline with skew and drift detection."""
    inference_data = inference_data_loader()
    model_deployment_service = model_deployer()
    predictor(service=model_deployment_service, data=inference_data)
    training_data, _, _, _ = training_data_loader()
    drift_detector(training_data, inference_data)

inference_pipeline()

# Um alle Modelle im Prod zu sehen

In [None]:
!zenml model-deployer models list