> Не забываем в CLI переключаться в нужный environment!

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
import mlflow
from mlflow.models.signature import infer_signature

In [3]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("ML_Experiment")

<Experiment: artifact_location='mlflow-artifacts:/371774194363685445', creation_time=1742546665181, experiment_id='371774194363685445', last_update_time=1742546665181, lifecycle_stage='active', name='ML_Experiment', tags={}>

## MLFlow examples

> Неодопустимо перезаписывание param в рамках одного run

In [12]:
# Логирование 1 параметра эксперимента
train_size = 0.8
mlflow.log_param("train_size", train_size)

0.8

In [13]:
# Логирование сразу нескольких параметров
penalty = "l2"
fit_intercept = False

mlflow.log_params({
    "penalty": penalty,
    "fit_intercept": fit_intercept,
})

In [14]:
lr = LogisticRegression(
    penalty=penalty,
    fit_intercept=fit_intercept,
)

In [15]:
X,y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=train_size)

In [16]:
# Logging datasets

mlflow.log_input(
    mlflow.data.from_numpy(X_train, name="X_train"),
    context="train"
)
mlflow.log_input(
    mlflow.data.from_numpy(y_train, name="y_train"),
    context="train"
)

In [17]:
# Or save localy first
pd.DataFrame(X_train).to_parquet("X_train_1.parquet")
mlflow.log_artifact(local_path="X_train_1.parquet", artifact_path="X_train_1")

In [18]:
lr.fit(X_train, y_train)

In [19]:
pred = lr.predict(X_train)

In [20]:
mlflow.log_input(
    mlflow.data.from_numpy(pred, name="pred"),
    context="train_predict"
)

In [21]:
accuracy_train = (pred == y_train).mean()
accuracy_test = (lr.predict(X_test) == y_test).mean()

In [22]:
mlflow.log_metrics({
    "accuracy_train":accuracy_train,
    "accuracy_test": accuracy_test,
})

In [23]:
mlflow.log_input(
    mlflow.data.from_numpy(X_test, name="X_test"),
    context="test"
)
mlflow.log_input(
    mlflow.data.from_numpy(y_test, name="y_test"),
    context="test"
)

In [24]:
input_example = X_test[:1]
signature = infer_signature(input_example, lr.predict(input_example))

mlflow.sklearn.log_model(
    sk_model=lr,
    artifact_path="Logistic Regression v1",
    signature=signature,
    input_example=input_example
)

<mlflow.models.model.ModelInfo at 0x149f58cb0>

## Previous Runs

In [25]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy_train,metrics.accuracy_test,params.fit_intercept,params.penalty,params.train_size,tags.mlflow.source.type,tags.mlflow.runName,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.log-model.history
0,16b3fef7470a4a3c8e7c1c0153b35683,511522482551123436,RUNNING,mlflow-artifacts:/511522482551123436/16b3fef74...,2025-03-19 09:04:46.967000+00:00,,0.975,0.966667,False,l2,0.8,LOCAL,grandiose-robin-894,dakorolkov,/opt/anaconda3/envs/my_env/lib/python3.12/site...,"[{""run_id"": ""16b3fef7470a4a3c8e7c1c0153b35683""..."


In [38]:
mlflow.last_active_run().info

<RunInfo: artifact_uri='mlflow-artifacts:/511522482551123436/16b3fef7470a4a3c8e7c1c0153b35683/artifacts', end_time=None, experiment_id='511522482551123436', lifecycle_stage='active', run_id='16b3fef7470a4a3c8e7c1c0153b35683', run_name='grandiose-robin-894', run_uuid='16b3fef7470a4a3c8e7c1c0153b35683', start_time=1742375086967, status='RUNNING', user_id='dakorolkov'>

In [26]:
mlflow.active_run().info

<RunInfo: artifact_uri='mlflow-artifacts:/511522482551123436/16b3fef7470a4a3c8e7c1c0153b35683/artifacts', end_time=None, experiment_id='511522482551123436', lifecycle_stage='active', run_id='16b3fef7470a4a3c8e7c1c0153b35683', run_name='grandiose-robin-894', run_uuid='16b3fef7470a4a3c8e7c1c0153b35683', start_time=1742375086967, status='RUNNING', user_id='dakorolkov'>

## Load Artefacts From MLFlow

### Model

In [35]:
# Берём из сервиса или из run

# ../experiment_id/run_id/artifacts
uri = mlflow.last_active_run().info.artifact_uri
uri += "/Logistic Regression v1"

lr_load = mlflow.sklearn.load_model(
    model_uri=uri
)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 177.05it/s]


In [36]:
lr_load.coef_

array([[ 0.73017082,  1.58710603, -2.1822981 , -1.04488072],
       [ 0.6395672 , -0.24415205, -0.08927362, -0.78516571],
       [-1.36973802, -1.34295398,  2.27157172,  1.83004643]])

In [37]:
lr.coef_

array([[ 0.73017082,  1.58710603, -2.1822981 , -1.04488072],
       [ 0.6395672 , -0.24415205, -0.08927362, -0.78516571],
       [-1.36973802, -1.34295398,  2.27157172,  1.83004643]])

### Dataset

> Если сохраняли как артефакт, то можно вовсе через ui скачать

In [40]:
uri = mlflow.last_active_run().info.artifact_uri


In [41]:
# Как артефакт

artifact = mlflow.artifacts.download_artifacts(f"{uri}/X_train_1/X_train_1.parquet")
pd.read_parquet(artifact)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 49.65it/s]


Unnamed: 0,0,1,2,3
0,4.4,2.9,1.4,0.2
1,6.4,2.8,5.6,2.2
2,5.8,2.7,4.1,1.0
3,6.3,2.5,5.0,1.9
4,5.1,3.8,1.5,0.3
...,...,...,...,...
115,6.0,2.2,4.0,1.0
116,6.2,3.4,5.4,2.3
117,4.8,3.1,1.6,0.2
118,6.4,3.1,5.5,1.8


In [42]:
# Как mlflow.data.dataset - тут я не понял как скачивать!

run = mlflow.get_run(mlflow.last_active_run().info.run_id)

for i, d in enumerate(run.inputs.dataset_inputs):
    print(i, d.dataset.name)

0 y_train
1 pred
2 X_train
3 X_test
4 y_test


In [None]:
# dataset_info = run.inputs.dataset_inputs[0].dataset
# d = mlflow.data.get_source(dataset_info)
# d.load()