In [17]:
import numpy as np
import pandas as pd
import mlflow
import optuna
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score
from mlflow.models import infer_signature

# MLFlow

# Step 1 - Start ML Tracking Server

In the terminal, navigate to the notebook folder and the start the ml server

Run in the terminal (command line): mlflow server --host 127.0.0.1 --port 8080

This will start the server, visit 127.0.0.1:8080 or localhost:8080

# Step 2

In [2]:
# Set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [3]:
print("Current Tracking URI:", mlflow.get_tracking_uri())

Current Tracking URI: http://127.0.0.1:8080


__Detailed Explanation__
mlflow.set_tracking_uri() configures where MLflow will store experiment metadata.
uri="http://127.0.0.1:8080" points to an MLflow Tracking Server running locally on your machine at port 8080.

__How It Works__


-> Experiment Metadata Logging

All logs of experiments (parameters, metrics, tags) will be stored on the MLflow server running at http://127.0.0.1:8080.

Also, by default, it stores the artifacts on the mlruns folder created locally


-> Model Artifact Logging

Artifacts like models, plots, and files will be stored in the location configured on the server (--default-artifact-root).

__UI Access__
You can view experiments in the MLflow UI by visiting:
🔗 http://127.0.0.1:8080

# Step 3

Create an MLFLow Experiment. It will create a folder with artifact_location= ... name in the mlruns folder

In [23]:
mlflow.set_experiment("First Experiment")

<Experiment: artifact_location='mlflow-artifacts:/516757630045126067', creation_time=1754054035158, experiment_id='516757630045126067', last_update_time=1754054035158, lifecycle_stage='active', name='First Experiment', tags={}>

# Step 4

Prepare the data and fit the model

In [24]:
# Load datasets
df = pd.read_csv('WineQT.csv', sep=',')

In [25]:
df.head(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2


In [26]:
# Split the data into training, validation, and test sets
train, test = train_test_split(df, test_size=0.25, random_state=42)
x_train = train.drop(["quality"], axis=1).values
y_train = train[["quality"]].values.ravel()
x_test= test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()

In [27]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

In [28]:
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(x_train, y_train)

In [29]:
y_pred_val = model.predict(x_val)
y_pred_test = model.predict(x_test)

In [30]:
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353


In [31]:
params = {
    'alpha': model.alpha
}

In [32]:
# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Log the hyperparameters
    mlflow.log_params(params)
    # Log the loss metric
    mlflow.log_metric("rmse_val", rmse_val)
    mlflow.log_metric("rmse_test", rmse_test)
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("model_version", "baseline")

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="wine_model_lasso",
        input_example=x_train,
        registered_model_name="lasso_baseline_1",
    )

Registered model 'lasso_baseline_1' already exists. Creating a new version of this model...
2025/08/01 17:41:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: lasso_baseline_1, version 3


🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/184179c5c21c4adc890edf7cca462a03
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067


Created version '3' of model 'lasso_baseline_1'.


## Child runs

Within the same run, you can create child runs with different parameters

In [50]:
# Define a function to log parameters and metrics and add tag
# logging for search_runs functionality
def log_run(run_name, iteration, params, rmse_val, rmse_test):
    with mlflow.start_run(run_name=f"{run_name}_{iteration}", nested=True):
        # Log the hyperparameters
        mlflow.log_params(params)
        # Log the loss metric
        mlflow.log_metric("rmse_error_val", rmse_val)
        mlflow.log_metric("rmse_error_test", rmse_test)

In [45]:
# Specify several alphas
alpha_range = np.logspace(0, -3, num=10).tolist()
print(alpha_range)

[1.0, 0.4641588833612779, 0.2154434690031884, 0.1, 0.046415888336127795, 0.021544346900318846, 0.01, 0.004641588833612782, 0.0021544346900318843, 0.001]


In [47]:
run_name = 'Lasso_nested'
with mlflow.start_run(run_name=run_name):
    for idx, alpha in enumerate(alpha_range):
        model = Lasso(alpha=alpha)
        model.fit(x_train, y_train)
        # Predictions
        y_pred_val = model.predict(x_val)
        y_pred_test = model.predict(x_test)
        # Erros
        rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
        rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
        print(f'Iteration {idx}, rmse={rmse_test}')
        # Writing params
        params = {'alpha': alpha}
        log_run(run_name, idx, params, rmse_val, rmse_test)

Iteration 0, rmse=0.7374379792573892
🏃 View run Lasso_nested_0 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/6f9fc596e26e41a4b2ca7da25f60e43d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 1, rmse=0.7366319489063405
🏃 View run Lasso_nested_1 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/a8a8a49b7ee24291851614e8a1a531ea
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 2, rmse=0.6743324662326164
🏃 View run Lasso_nested_2 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/b31bfac145a041a79bfc32483ac0d2a1
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 3, rmse=0.638554128638353
🏃 View run Lasso_nested_3 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/ce7ddb15126a4965833f0d4d2a74d918
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 4, rmse=0.628923587071802
🏃 View run Lasso_

### Child runs with Optuna

Child runs are useful when tuning hyperparameters under the same run. Here's an example of how to rune hyperparameters wth Optuna.

In [64]:
# Specify the objective function with Optuna
def objective(trial, x_train, y_train, x_val, y_val, x_test, y_test):
    # Sample alpha
    alpha = trial.suggest_float('alpha', 0.0001, 1, log=True)
    model = Lasso(alpha=alpha)
    model.fit(x_train, y_train)
    # Predictions
    y_pred_val = model.predict(x_val)
    y_pred_test = model.predict(x_test)
    # Erros
    rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    params = {'alpha': alpha}
    log_run(run_name, trial.number, params, rmse_val, rmse_test)
    return round(rmse_val, 3)

In [65]:
study = optuna.create_study(direction="minimize")

[I 2025-08-02 14:03:24,962] A new study created in memory with name: no-name-7b154b66-c55b-451a-a7fd-82aef6d089e0


In [66]:
run_name = 'Lasso_optuna'
with mlflow.start_run(run_name=run_name):
    study.optimize(
    lambda trial: objective(trial, x_train, y_train, x_val, y_val, x_test, y_test),
    n_trials=10
)
    # Log the best metrics and parameters info
    mlflow.log_metric("best_rmse", study.best_value)
    mlflow.log_params(study.best_params)
    mlflow.set_tag('best_model', 'true')
    # Refit and log the best model
    model = Lasso(**study.best_params)
    model.fit(x_train, y_train)
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="best_model_lasso",
        input_example=x_train,
#         registered_model_name="best_model_lasso",
    )

[I 2025-08-02 14:03:25,217] Trial 0 finished with value: 0.572 and parameters: {'alpha': 0.0012763906181106578}. Best is trial 0 with value: 0.572.
[I 2025-08-02 14:03:25,311] Trial 1 finished with value: 0.579 and parameters: {'alpha': 0.008637636160407839}. Best is trial 0 with value: 0.572.


Alpha = 0.0012763906181106578
RMSE val = 0.5715216132633616, RMSE test = 0.609019662261828
🏃 View run Lasso_optuna_0 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/10d2cac749374ae7a827f64d27c7f9bd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.008637636160407839
RMSE val = 0.5794276266600845, RMSE test = 0.6056065029825204
🏃 View run Lasso_optuna_1 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/af23ba2897ea40b3b97c02c8c9ff3842
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.14109067380360615
RMSE val = 0.6474581339947599, RMSE test = 0.6513333206284194


[I 2025-08-02 14:03:25,400] Trial 2 finished with value: 0.647 and parameters: {'alpha': 0.14109067380360615}. Best is trial 0 with value: 0.572.
[I 2025-08-02 14:03:25,487] Trial 3 finished with value: 0.571 and parameters: {'alpha': 0.0003134351413728864}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,570] Trial 4 finished with value: 0.572 and parameters: {'alpha': 0.001552855110295442}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_2 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/b59221ff85514ce7aab1a2be2c19f369
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0003134351413728864
RMSE val = 0.5711530951272646, RMSE test = 0.6122679319127128
🏃 View run Lasso_optuna_3 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/91d49b5a1c984a24bb460e745893fff9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.001552855110295442
RMSE val = 0.5718007267437641, RMSE test = 0.6083471053016355
🏃 View run Lasso_optuna_4 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/8fea6c4275354961bdd21bfc60ed903b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0012427079085965739
RMSE val = 0.5714928872439479, RMSE test = 0.609109561490189


[I 2025-08-02 14:03:25,653] Trial 5 finished with value: 0.571 and parameters: {'alpha': 0.0012427079085965739}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,739] Trial 6 finished with value: 0.577 and parameters: {'alpha': 0.004346113270371531}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,819] Trial 7 finished with value: 0.577 and parameters: {'alpha': 0.0049069405724022725}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_5 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/77eb88aaa1254c2ca4d12d687cbe9158
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.004346113270371531
RMSE val = 0.577035647476002, RMSE test = 0.6061785448193832
🏃 View run Lasso_optuna_6 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/1393d92301044a34924da22640823c4a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0049069405724022725
RMSE val = 0.5772927165625208, RMSE test = 0.6060153497229979
🏃 View run Lasso_optuna_7 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/de7f47df5e0648ed9a9fa133ef36a7d9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.00026521823057008744
RMSE val = 0.5711593196633804, RMSE test = 0.6124673246369146


[I 2025-08-02 14:03:25,898] Trial 8 finished with value: 0.571 and parameters: {'alpha': 0.00026521823057008744}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,968] Trial 9 finished with value: 0.577 and parameters: {'alpha': 0.005334063295402457}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_8 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/e86929c9cda1458db59fb8d2367dd814
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.005334063295402457
RMSE val = 0.5774996111639723, RMSE test = 0.605908854850016
🏃 View run Lasso_optuna_9 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/fb2366d8c5544774a0576a842dde20c8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
🏃 View run Lasso_optuna at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/34405e1852b04624bcb1fe1a93bd5146
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067


### After the run, we can compare the runs, check the scatter and countour plots of the parameter

# Loading and registering the best optuna model

In [67]:
# Search for the best model
client = MlflowClient()

In [68]:
# Search for runs with the "best_model" tag set to "true"
runs = mlflow.search_runs(
    filter_string="tags.best_model = 'true'",
    order_by=["start_time DESC"],  # Sort by the latest run
    max_results=1  # Get only the most recent run
)

In [69]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.best_rmse,params.alpha,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName,tags.best_model,tags.mlflow.user
0,34405e1852b04624bcb1fe1a93bd5146,516757630045126067,FINISHED,mlflow-artifacts:/516757630045126067/34405e185...,2025-08-02 10:03:25.121000+00:00,2025-08-02 10:03:27.047000+00:00,0.571,0.0003134351413728,LOCAL,/Users/timurbikmuhametov/miniconda3/envs/ml_bl...,Lasso_optuna,True,timurbikmuhametov


In [70]:
# Get the run ID of the latest "best_model" if there are several
latest_exp_id = runs.iloc[0].experiment_id
latest_run_id = runs.iloc[0].run_id

In [71]:
# List all artifacts for the given run_id
artifacts = client.list_artifacts(latest_run_id)
for artifact in artifacts:
    print(artifact.path)

In [None]:
# Load the latest best model (adjust artifact path if needed)
model_uri = f"runs:/{latest_run_id}/best_model_lasso"  # or ridge_model, etc.
latest_best_model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# Register the best model
mlflow.register_model(model_uri, "prod_lasso_model")

# Loading the registered model

In [73]:
model_name = "prod_lasso_model"
model_version = "latest"

# Load the model from the Model Registry
model_uri = f"models:/{model_name}/{model_version}"
model = mlflow.sklearn.load_model(model_uri)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]