# MLFlow example


## Логирование sklearn модели

In [None]:
!pip install mlflow

Простое автологирование

In [None]:
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.autolog()

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
# MLflow triggers logging automatically upon model fitting
rf.fit(X_train, y_train)


In [None]:
import mlflow
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from mlflow.models import infer_signature
import mlflow.sklearn
import mlflow.exceptions

# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define the model hyperparameters
params = {
  "solver": "lbfgs",
  "max_iter": 1000, # Use hydra for configuration management
  "random_state": 8888}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="macro")
recall = recall_score(y_test, y_pred, average="macro")
f1 = f1_score(y_test, y_pred, average="macro")
print(accuracy, precision, recall, f1)

experiment_name = "MLflow experiment 01"
run_name = "run 01"
try:
    # Create a new MLflow Experiment
    experiment_id = mlflow.create_experiment(name=experiment_name)
except mlflow.exceptions.MlflowException as e:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
    print(experiment_id)

with mlflow.start_run(run_name=run_name, experiment_id=experiment_id) as run:
    # Log the hyperparameters
    mlflow.log_params(params=params)
    # Log the performance metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1", f1)
    mlflow.log_metrics({"accuracy": accuracy,"f1": f1})
    # Set a tag that we can use to remind ourselves what th
    mlflow.set_tag("Training Info", "Basic LR model for iris run was for data")
    # Infer the model signature
    signature = infer_signature(X_test, y_test)
    # Log the model
    model_info = mlflow.sklearn.log_model(
      sk_model=lr,
      artifact_path="iris_model",
      signature=signature,
      input_example=X_test,
      registered_model_name="LR_model_01",
      pyfunc_predict_fn = "predict_proba"
    )
    sk_pyfunc = mlflow.sklearn.load_model(model_uri=model_info.model_uri)
    predictions = sk_pyfunc.predict(X_test)
    print(predictions)
    eval_data = pd.DataFrame(y_test)
    eval_data.columns = ["label"]
    eval_data["predictions"] = predictions

    results = mlflow.evaluate(
      data=eval_data,
      model_type="classifier",
      targets= "label",
      predictions="predictions",
      evaluators = ["default"])
    print(f"metrics:\\n{results.metrics}")
    print(f"artifacts:\\n{results.artifacts}")

## Логирование pytorch модели

In [None]:
import numpy as np
import mlflow
from mlflow.models import infer_signature
import torch
from torch import nn
import pandas as pd

net = nn.Linear(10, 1)
loss_function = nn.L1Loss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
X = torch.randn(100, 10)
y = torch.randn(100, 1)
print(X.shape, y.shape)
epochs = 5
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = net(X)
    loss = loss_function(outputs, y)
    loss.backward()
    optimizer.step()

with mlflow.start_run() as run:
    signature = infer_signature(X.numpy(), net(X).detach().numpy())
    model_info = mlflow.pytorch.log_model(
      pytorch_model = net,
      artifact_path = "pytorch model",
      signature=signature,
      input_example=X.numpy(),
      registered_model_name="pytorch_model"
    )
    pytorch_pyfunc = mlflow.pyfunc.load_model(model_uri=model_info.model_uri)
    X_test = torch.randn(20, 10).numpy()
    predictions = pytorch_pyfunc.predict(X_test)
    print(predictions)
    eval_data = pd.DataFrame(y.numpy())
    print(eval_data)
    eval_data.columns = ["label"]
    eval_data["predictions"] = net(X).detach().numpy()
    print(eval_data.shape)
    results = mlflow.evaluate(
      data=eval_data,
      model_type="regressor",
      targets= "label",
      predictions="predictions",
      evaluators = ["default"]
    )
    print(f"metrics:\\n{results.metrics}")
    print(f"artifacts:\\n{results.artifacts}")