In [1]:
import mlflow
import os
import pandas as pd

from mlflow.tracking import MlflowClient
from mlflow.models import infer_signature
from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository

from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [2]:
os.getenv("MLFLOW_TRACKING_URI", "No env")

'postgresql://test:test@postgres:5432/mlflow'

In [3]:
mlflow.get_registry_uri()

'postgresql://test:test@postgres:5432/mlflow'

# Управление экспериментами

In [12]:
exp_id = mlflow.create_experiment(name="test2")

In [6]:
exp_id

'3'

In [7]:
mlflow.delete_experiment(experiment_id=exp_id)

Не забудьте про rm -rf mlruns/trash/*

In [8]:
mlflow.search_experiments(
    filter_string="name = 'test'"
)

[]

In [9]:
mlflow.search_experiments()

[<Experiment: artifact_location='s3://mlflow/0', creation_time=1727086739990, experiment_id='0', last_update_time=1727086739990, lifecycle_stage='active', name='Default', tags={}>]

In [14]:
mlflow.set_experiment(experiment_name="test2")

with mlflow.start_run():
    # Обучим модель.
    model.fit(X_train, y_train)

# Что можно отслеживать в рамках эксперимента

In [15]:
# Прочитаем данные.
housing = fetch_california_housing(as_frame=True)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(housing['data'], housing['target'])
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

In [17]:
X_test.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
8255,2.346,38.0,3.273392,1.054094,984.0,1.438596,33.77,-118.17
872,4.3594,23.0,5.225573,1.033776,2461.0,2.968637,37.56,-122.02
17609,5.1765,34.0,6.784173,0.971223,378.0,2.719424,37.29,-121.92
13655,3.39,5.0,5.195896,1.076493,1310.0,2.44403,34.04,-117.31
11985,4.4545,21.0,5.8375,1.153125,2021.0,3.157812,34.02,-117.49


## Первый запуск run

In [18]:
model = LinearRegression()

In [19]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run(experiment_id=exp_id) as run:
    # Обучим модель.
    model.fit(X_train, y_train)

## Ручное логирование метрик

In [20]:
model = RandomForestRegressor()

In [22]:
params = {'n_estimators':50, 'min_samples_split':3}

In [23]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run(experiment_id=exp_id) as run:

    # Обучим модель.
    model.fit(X_train, y_train)

    # Сделаем предсказание.
    prediction = model.predict(X_val)

    mlflow.log_metrics({'r2':r2_score(y_val, prediction),
                        'mse':mean_squared_error(y_val, prediction)})

    mlflow.log_params(params)

## Автоматическая оценка модели

In [24]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run(experiment_id=exp_id) as run:
    # Обучим модель.
    model.fit(X_train, y_train)

    # Сделаем предсказание.
    prediction = model.predict(X_val)

    # Создадим валидационный датасет.
    eval_df = X_val.copy()
    eval_df["target"] = y_val
    eval_df["prediction"] = prediction

    # Сохраним результаты обучения с помощью MLFlow.
    mlflow.evaluate(
        data=eval_df,
        targets="target",
        predictions="prediction",
        model_type="regressor",
        evaluators=["default"],
    )

Matplotlib created a temporary cache directory at /tmp/matplotlib-zw_6hm8c because the default path (/home/airflow/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.
2024/10/02 11:11:03 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


## Логирование модели в хранилище артефактов

In [25]:
model = LinearRegression()

In [26]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run(experiment_id=exp_id) as run:
    # Обучим модель.
    model.fit(X_train, y_train)

    # Сделаем предсказание.
    prediction = model.predict(X_val)

    # Создадим валидационный датасет.
    eval_df = X_val.copy()
    eval_df["target"] = y_val
    #eval_df["prediction"] = prediction

    # Сохраним результаты обучения с помощью MLFlow.
    signature = infer_signature(housing['data'], prediction)
    model_info = mlflow.sklearn.log_model(model, 'logreg', signature=signature)
    mlflow.evaluate(
        model=model_info.model_uri,
        data=eval_df,
        targets="target",
        #predictions="prediction",
        model_type="regressor",
        evaluators=["default"],
    )

2024/10/02 11:12:33 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/02 11:12:33 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


In [27]:
os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"

In [28]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run(experiment_id=exp_id) as run:
    # Обучим модель.
    model.fit(X_train, y_train)

    # Сделаем предсказание.
    prediction = model.predict(X_val)

    # Создадим валидационный датасет.
    eval_df = X_val.copy()
    eval_df["target"] = y_val
    #eval_df["prediction"] = prediction

    # Сохраним результаты обучения с помощью MLFlow.
    signature = infer_signature(housing['data'], prediction)
    model_info = mlflow.sklearn.log_model(model, 'logreg', signature=signature)
    mlflow.evaluate(
        model=model_info.model_uri,
        data=eval_df,
        targets="target",
        #predictions="prediction",
        model_type="regressor",
        evaluators=["default"],
    )

2024/10/02 11:13:37 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
2024/10/02 11:13:42 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/02 11:13:42 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/02 11:13:42 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/10/02 11:13:42 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [29]:
os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "false"

In [30]:
# Удалим эксперимент из MLFlow.
mlflow.delete_experiment(experiment_id=exp_id)

In [32]:
# Удалим эксперимент из S3.
repository = get_artifact_repository(f's3://mlflow/{exp_id}')
repository.delete_artifacts()

## Работа с дочерними ранами

In [40]:
# Создать новый эксперимент
exp_name = "parent_run_experiment"
#experiment_id = mlflow.create_experiment(exp_name)
experiment = mlflow.set_experiment(exp_name)

In [36]:
models = dict(zip(["RandomForest", "LinearRegression", "HistGB"], 
                  [RandomForestRegressor(), LinearRegression(), HistGradientBoostingRegressor()]))

In [37]:
# Создадим parent run.
with mlflow.start_run(run_name="parent_run", experiment_id = experiment.experiment_id, description = "parent") as parent_run:
    for model_name in models.keys():
        # Запустим child run на каждую модель.
        with mlflow.start_run(run_name=model_name, experiment_id=experiment.experiment_id, nested=True) as child_run:
            model = models[model_name]
            
            # Обучим модель.
            model.fit(pd.DataFrame(X_train), y_train)
        
            # Сделаем предсказание.
            prediction = model.predict(X_val)
        
            # Создадим валидационный датасет.
            eval_df = X_val.copy()
            eval_df["target"] = y_val
        
            # Сохраним результаты обучения с помощью MLFlow.
            signature = infer_signature(X_test, prediction)
            model_info = mlflow.sklearn.log_model(model, "logreg", signature=signature)
            mlflow.evaluate(
                model=model_info.model_uri,
                data=eval_df,
                targets="target",
                model_type="regressor",
                evaluators=["default"],
            )

NameError: name 'experiment_id' is not defined

# Работа с реджистри

In [210]:
# Запустим свой первый run в рамках созданного выше эксперимента.
with mlflow.start_run() as run:
    # Обучим модель.
    model.fit(pd.DataFrame(X_train), y_train)

    # Сделаем предсказание.
    prediction = model.predict(X_val)

    # Создадим валидационный датасет.
    eval_df = X_val.copy()
    eval_df["target"] = y_val

    # Сохраним результаты обучения с помощью MLFlow.
    signature = infer_signature(X_test, prediction)
    model_info = mlflow.sklearn.log_model(model, 'linreg', signature=signature)
    mlflow.evaluate(
        model=model_info.model_uri,
        data=eval_df,
        targets="target",
        model_type="regressor",
        evaluators=["default"],
    )



Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/01 16:17:01 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/01 16:17:01 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/01 16:17:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run valuable-donkey-150 at: http://mlflow-service:5000/#/experiments/637350945412675068/runs/dbb5f886c2d647e59d964a5be27362c5.
2024/10/01 16:17:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/637350945412675068.


In [212]:
model_info.model_uri

'runs:/dbb5f886c2d647e59d964a5be27362c5/logreg'

Регистрация модели и версии способ 1

In [213]:
mlflow.register_model(model_uri=model_info.model_uri, name='linreg-model')

Successfully registered model 'linreg-model'.
2024/10/01 16:17:35 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: linreg-model, version 1
Created version '1' of model 'linreg-model'.


<ModelVersion: aliases=[], creation_timestamp=1727799455595, current_stage='None', description='', last_updated_timestamp=1727799455595, name='linreg-model', run_id='dbb5f886c2d647e59d964a5be27362c5', run_link='', source='s3://lizvladi-mlflow-artifacts/parent_run_experiment/dbb5f886c2d647e59d964a5be27362c5/artifacts/logreg', status='READY', status_message='', tags={}, user_id='', version='1'>

Регистрация версии способ 2

In [214]:
client = MlflowClient()

In [216]:
client.create_model_version(name='linreg-model', source=model_info.model_uri, run_id=run.info.run_id)

2024/10/01 16:19:44 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: linreg-model, version 2


<ModelVersion: aliases=[], creation_timestamp=1727799584334, current_stage='None', description='', last_updated_timestamp=1727799584334, name='linreg-model', run_id='dbb5f886c2d647e59d964a5be27362c5', run_link='', source='runs:/dbb5f886c2d647e59d964a5be27362c5/logreg', status='READY', status_message='', tags={}, user_id='', version='2'>

Регистрация версии способ 3

In [217]:
# Создадим parent run.
with mlflow.start_run(run_name="parent_run", experiment_id = experiment_id, description = "parent") as parent_run:
    for model_name in models.keys():
        # Запустим child run на каждую модель.
        with mlflow.start_run(run_name=model_name, experiment_id=experiment_id, nested=True) as child_run:
            model = models[model_name]
            
            # Обучим модель.
            model.fit(pd.DataFrame(X_train), y_train)
        
            # Сделаем предсказание.
            prediction = model.predict(X_val)

            # Создадим валидационный датасет.
            eval_df = X_val.copy()
            eval_df["target"] = y_val
        
            # Сохраним результаты обучения с помощью MLFlow.
            signature = infer_signature(X_test, prediction)
            model_info = mlflow.sklearn.log_model(model, "linreg", signature=signature, 
                                                  registered_model_name=f"sk-learn-{model_name}-reg-model")
            mlflow.evaluate(
                model=model_info.model_uri,
                data=eval_df,
                targets="target",
                model_type="regressor",
                evaluators=["default"],
            )

Successfully registered model 'sk-learn-RandomForest-reg-model'.
2024/10/01 16:20:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-RandomForest-reg-model, version 1
Created version '1' of model 'sk-learn-RandomForest-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/01 16:20:56 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/01 16:20:56 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/01 16:20:56 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForest at: http://mlflow-service:5000/#/experiments/281429030876195101/runs/9532f97ad3334314aedd97517b00f4cb.
2024/10/01 16:20:56 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/281429030876195101.
Successfully registered model 'sk-learn-LinearRegression-reg-model'.
2024/10/01 16:21:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-LinearRegression-reg-model, version 1
Created version '1' of model 'sk-learn-LinearRegression-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/01 16:21:01 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/01 16:21:01 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/01 16:21:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run LinearRegression at: http://mlflow-service:5000/#/experiments/281429030876195101/runs/f082e1f0bc1340b692b497d9b06ed9db.
2024/10/01 16:21:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/281429030876195101.
Successfully registered model 'sk-learn-HistGB-reg-model'.
2024/10/01 16:21:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-HistGB-reg-model, version 1
Created version '1' of model 'sk-learn-HistGB-reg-model'.


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/10/01 16:21:09 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/01 16:21:09 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/01 16:21:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run HistGB at: http://mlflow-service:5000/#/experiments/281429030876195101/runs/3322b4506cf54da88f325a248799cc9f.
2024/10/01 16:21:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/281429030876195101.
2024/10/01 16:21:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run parent_run at: http://mlflow-service:5000/#/experiments/281429030876195101/runs/b7f35d1ab419468fb6c2549922069677.
2024/10/01 16:21:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://mlflow-service:5000/#/experiments/281429030876195101.


In [220]:
model = mlflow.sklearn.load_model(model_uri='s3://lizvladi-mlflow-artifacts/mlflow/281429030876195101/3322b4506cf54da88f325a248799cc9f/artifacts/linreg')

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [222]:
model.predict(X_test)

array([1.8427382 , 0.94766565, 4.7691683 , ..., 3.62601169, 2.77058856,
       2.32143739])