In [7]:
import mlflow
import mlflow.sklearn
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import datetime

In [8]:
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("Iris_Classification")

class MLflowModelManager:
    """Класс для управления моделями с MLflow"""
    
    def __init__(self, experiment_name="Iris_Classification"):
        self.experiment_name = experiment_name
        mlflow.set_experiment(experiment_name)
        
    def train_and_log_model(self, params=None, model_name="logistic_regression"):

        iris = load_iris()
        X, y = iris.data, iris.target
        
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        if params is None:
            params = {
                "C": 1.0,
                "max_iter": 100,
                "solver": "lbfgs",
                "multi_class": "auto",
                "random_state": 42
            }
        
        with mlflow.start_run(run_name=f"{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
            
            mlflow.log_params(params)
            
            model = LogisticRegression(**params)
            model.fit(X_train, y_train)
            
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)
            
            metrics = {
                "accuracy": accuracy_score(y_test, y_pred),
                "precision_macro": precision_score(y_test, y_pred, average='macro'),
                "recall_macro": recall_score(y_test, y_pred, average='macro'),
                "f1_macro": f1_score(y_test, y_pred, average='macro'),
                "precision_weighted": precision_score(y_test, y_pred, average='weighted'),
                "recall_weighted": recall_score(y_test, y_pred, average='weighted'),
                "f1_weighted": f1_score(y_test, y_pred, average='weighted')
            }
            
            mlflow.log_metrics(metrics)
            
            class_names = iris.target_names
            cm = confusion_matrix(y_test, y_pred)
            
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                       xticklabels=class_names, yticklabels=class_names)
            plt.title('Confusion Matrix')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            
            temp_plot = "confusion_matrix.png"
            plt.savefig(temp_plot)
            mlflow.log_artifact(temp_plot, "plots")
            plt.close()
            os.remove(temp_plot)
            
            # Логируем модель
            mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="models",
                registered_model_name=model_name
            )
            
            # Сохраняем информацию о данных
            data_info = {
                "feature_names": iris.feature_names,
                "target_names": iris.target_names,
                "n_samples": len(X),
                "n_features": X.shape[1],
                "n_classes": len(np.unique(y))
            }
            
            # Логируем как JSON
            mlflow.log_dict(data_info, "data_info.json")
            
            # Выводим информацию о run
            print(f"\nRun ID: {mlflow.active_run().info.run_id}")
            print(f"Метрики:")
            for metric_name, metric_value in metrics.items():
                print(f"  {metric_name}: {metric_value:.4f}")
            
            return model, mlflow.active_run().info.run_id
    
    def load_model(self, run_id=None, model_uri=None):
        """
        Загрузка модели из MLflow
        
        Args:
            run_id: ID run'а
            model_uri: URI модели (например: "runs:/<run_id>/models")
        """
        if model_uri is None and run_id is not None:
            model_uri = f"runs:/{run_id}/models"
        
        if model_uri is None:
            model_uri = "models:/logistic_regression/latest"
        
        try:
            model = mlflow.sklearn.load_model(model_uri)
            print(f"Модель загружена из {model_uri}")
            return model
        except Exception as e:
            print(f"Ошибка загрузки модели: {e}")
            return None
    
    def compare_runs(self):
        """Сравнение всех run'ов в эксперименте"""
        experiment = mlflow.get_experiment_by_name(self.experiment_name)
        if experiment is None:
            print("Эксперимент не найден")
            return
        
        runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
        
        if len(runs) == 0:
            print("Нет запусков в эксперименте")
            return
        
        print(f"\nСравнение запусков в эксперименте '{self.experiment_name}':")
        print("-" * 100)
        
        columns = ['run_id', 'status', 'start_time', 'params.C', 'metrics.accuracy', 
                  'metrics.f1_macro', 'tags.mlflow.runName']
        
        available_columns = [col for col in columns if col in runs.columns]
        print(runs[available_columns].to_string())
        
        best_run = runs.loc[runs['metrics.accuracy'].idxmax()]
        print(f"\nЛучший run (accuracy={best_run['metrics.accuracy']:.4f}):")
        print(f"  Run ID: {best_run['run_id']}")
        print(f"  Run Name: {best_run.get('tags.mlflow.runName', 'N/A')}")
        
        return runs

In [9]:
def train_multiple_versions():
    """Обучение нескольких версий модели с разными параметрами"""
    
    manager = MLflowModelManager()
    
    param_configs = [
        {"C": 0.1, "max_iter": 100, "solver": "lbfgs"},
        {"C": 1.0, "max_iter": 100, "solver": "lbfgs"},
        {"C": 10.0, "max_iter": 100, "solver": "lbfgs"},
        {"C": 1.0, "max_iter": 200, "solver": "sag"},
        {"C": 1.0, "max_iter": 100, "solver": "newton-cg"},
    ]
    
    models = []
    run_ids = []
    
    print("Обучение нескольких версий модели...")
    for i, params in enumerate(param_configs):
        print(f"\n--- Версия {i+1} ---")
        model, run_id = manager.train_and_log_model(
            params=params,
            model_name=f"logistic_regression_v{i+1}"
        )
        models.append(model)
        run_ids.append(run_id)
    
    return manager, models, run_ids

In [10]:
def demonstrate_loading_and_prediction(manager, run_id):
    """Демонстрация загрузки модели и предсказания"""  

    model = manager.load_model(run_id=run_id)
    
    if model is None:
        print("Не удалось загрузить модель")
        return
    
    iris = load_iris()
    X, y = iris.data, iris.target
    
    np.random.seed(42)
    indices = np.random.choice(len(X), 5, replace=False)
    samples = X[indices]
    true_labels = y[indices]
    
    predictions = model.predict(samples)
    probabilities = model.predict_proba(samples)
    
    print("\nПримеры предсказаний:")
    for i, (sample, true, pred, proba) in enumerate(zip(samples, true_labels, predictions, probabilities)):
        print(f"\nПример {i+1}:")
        print(f"  Признаки: {sample}")
        print(f"  Истинный класс: {iris.target_names[true]} ({true})")
        print(f"  Предсказанный класс: {iris.target_names[pred]} ({pred})")
        print(f"  Вероятности: {dict(zip(iris.target_names, proba))}")
        print(f"  {'да' if true == pred else 'нет'}")

In [11]:
def main():
    """Основная функция"""
    
    print("MLflow Model Versioning Demo")
    
    manager = MLflowModelManager()
    
    manager, models, run_ids = train_multiple_versions()
    
    manager.compare_runs()
    
    if run_ids:
        last_run_id = run_ids[-1]
        demonstrate_loading_and_prediction(manager, last_run_id)
    
    print("ЗАГРУЗКА КОНКРЕТНОЙ ВЕРСИИ МОДЕЛИ")
    
    if len(run_ids) > 1:
        print(f"\nЗагрузка модели с run_id: {run_ids[0]}")
        model_v1 = manager.load_model(run_id=run_ids[0])
        
        if model_v1:
            print("Модель успешно загружена")
            print(f"   Параметры модели: {model_v1.get_params()}")
    

if __name__ == "__main__":
    main()

MLflow Model Versioning Demo
Обучение нескольких версий модели...

--- Версия 1 ---


Registered model 'logistic_regression_v1' already exists. Creating a new version of this model...
Created version '2' of model 'logistic_regression_v1'.



Run ID: c04702f098bf4fb59f6d74d92b424509
Метрики:
  accuracy: 0.9667
  precision_macro: 0.9697
  recall_macro: 0.9667
  f1_macro: 0.9666
  precision_weighted: 0.9697
  recall_weighted: 0.9667
  f1_weighted: 0.9666

--- Версия 2 ---


Registered model 'logistic_regression_v2' already exists. Creating a new version of this model...
Created version '2' of model 'logistic_regression_v2'.
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Run ID: f421563f813c4cec81dc34196667d9a0
Метрики:
  accuracy: 0.9667
  precision_macro: 0.9697
  recall_macro: 0.9667
  f1_macro: 0.9666
  precision_weighted: 0.9697
  recall_weighted: 0.9667
  f1_weighted: 0.9666

--- Версия 3 ---


Registered model 'logistic_regression_v3' already exists. Creating a new version of this model...
Created version '2' of model 'logistic_regression_v3'.



Run ID: 1c4192608f7e43c5bb92db2f30c343f0
Метрики:
  accuracy: 1.0000
  precision_macro: 1.0000
  recall_macro: 1.0000
  f1_macro: 1.0000
  precision_weighted: 1.0000
  recall_weighted: 1.0000
  f1_weighted: 1.0000

--- Версия 4 ---


Registered model 'logistic_regression_v4' already exists. Creating a new version of this model...
Created version '2' of model 'logistic_regression_v4'.



Run ID: d0627b96bf714f4bb236e999bb6bf9a8
Метрики:
  accuracy: 1.0000
  precision_macro: 1.0000
  recall_macro: 1.0000
  f1_macro: 1.0000
  precision_weighted: 1.0000
  recall_weighted: 1.0000
  f1_weighted: 1.0000

--- Версия 5 ---


Registered model 'logistic_regression_v5' already exists. Creating a new version of this model...
Created version '2' of model 'logistic_regression_v5'.



Run ID: a8218be663a9417ba9bd05a03fcd8681
Метрики:
  accuracy: 0.9667
  precision_macro: 0.9697
  recall_macro: 0.9667
  f1_macro: 0.9666
  precision_weighted: 0.9697
  recall_weighted: 0.9667
  f1_weighted: 0.9666

Сравнение запусков в эксперименте 'Iris_Classification':
----------------------------------------------------------------------------------------------------
                             run_id    status                       start_time params.C  metrics.accuracy  metrics.f1_macro                     tags.mlflow.runName
0  a8218be663a9417ba9bd05a03fcd8681  FINISHED 2026-02-27 16:25:26.880000+00:00      1.0          0.966667          0.966583  logistic_regression_v5_20260227_232526
1  d0627b96bf714f4bb236e999bb6bf9a8  FINISHED 2026-02-27 16:25:22.020000+00:00      1.0          1.000000          1.000000  logistic_regression_v4_20260227_232522
2  1c4192608f7e43c5bb92db2f30c343f0  FINISHED 2026-02-27 16:25:17.112000+00:00     10.0          1.000000          1.000000  logistic_

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 2296.99it/s] 


Модель загружена из runs:/a8218be663a9417ba9bd05a03fcd8681/models

Примеры предсказаний:

Пример 1:
  Признаки: [6.1 2.8 4.7 1.2]
  Истинный класс: versicolor (1)
  Предсказанный класс: versicolor (1)
  Вероятности: {np.str_('setosa'): np.float64(0.003972631304512017), np.str_('versicolor'): np.float64(0.8392158202496013), np.str_('virginica'): np.float64(0.15681154844588674)}
  да

Пример 2:
  Признаки: [5.7 3.8 1.7 0.3]
  Истинный класс: setosa (0)
  Предсказанный класс: setosa (0)
  Вероятности: {np.str_('setosa'): np.float64(0.9453756103005178), np.str_('versicolor'): np.float64(0.05462410943414651), np.str_('virginica'): np.float64(2.802653358802878e-07)}
  да

Пример 3:
  Признаки: [7.7 2.6 6.9 2.3]
  Истинный класс: virginica (2)
  Предсказанный класс: virginica (2)
  Вероятности: {np.str_('setosa'): np.float64(8.155871481494184e-09), np.str_('versicolor'): np.float64(0.0016426990229465369), np.str_('virginica'): np.float64(0.9983572928211819)}
  да

Пример 4:
  Признаки: [6.  2

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 2343.97it/s] 


Модель загружена из runs:/c04702f098bf4fb59f6d74d92b424509/models
Модель успешно загружена
   Параметры модели: {'C': 0.1, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'deprecated', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
