In [1]:
import numpy as np
import pandas as pd
import mlflow
import optuna
import pickle
import mlflow.sklearn
from catboost import CatBoostRegressor
from mlflow.tracking import MlflowClient
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score
from mlflow.models import infer_signature

# Starting ML Tracking Server

In the terminal, navigate to the notebook folder and the start the ml server

Run in the terminal (command line): mlflow server --host 127.0.0.1 --port 8080

**This is what it does:**

1. Starts the MLflow Tracking Server
A dedicated process that manages and serves your MLflow experiments.

2. Provides a Web UI
Accessible at http://127.0.0.1:8080 (or localhost:8080), where you can browse experiments, runs, parameters, metrics, and artifacts.

3. Exposes a Tracking API Endpoint
Other scripts or notebooks can log directly to this server if you set - **mlflow.set_tracking_uri("http://127.0.0.1:8080")**

4. MLflow automatically creates a folder mlruns/ in your working directory the first time you log something.
Inside mlruns/, it creates subfolders for:

- each experiment (default is 0)

- each run within that experiment

# Setting URI

In [4]:
# Set the tracking uri
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

What it does:

1. It tells your MLflow client (your script/notebook) where to send all logging data (experiments, runs, params, metrics, artifacts).

3. Since our host is local, it will still write to mlruns, but here you could configure a remote host URI.

In [5]:
print("Current Tracking URI:", mlflow.get_tracking_uri())

Current Tracking URI: http://127.0.0.1:8080


# Fitting the model

Prepare the data and fit the model

In [6]:
# Load datasets
df = pd.read_csv('WineQT.csv', sep=',')

# Split the data into training, validation, and test sets
train, test = train_test_split(df, test_size=0.25, random_state=42)
x_train = train.drop(["quality"], axis=1).values
y_train = train[["quality"]].values.ravel()
x_test= test.drop(["quality"], axis=1).values
y_test = test[["quality"]].values.ravel()

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.2, random_state=42
)

# Setting hyperparameter
alpha = 0.1
model = Lasso(alpha=alpha)
model.fit(x_train, y_train)

# Predicting
y_pred_val = model.predict(x_val)
y_pred_test = model.predict(x_test)

# Calculating metrics
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')

RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353


# Running first experiment

Create an MLFLow Experiment. It will create a folder with artifact_location= ... name in the mlruns folder

In [7]:
# Set the experiment name - it also creates an experiment if it doesn't exist
mlflow.set_experiment("Wine Quality Regression")

2025/08/19 14:27:04 INFO mlflow.tracking.fluent: Experiment with name 'Wine Quality Regression' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/980501120094527569', creation_time=1755599224007, experiment_id='980501120094527569', last_update_time=1755599224007, lifecycle_stage='active', name='Wine Quality Regression', tags={}>

In [8]:
# Setting hyperparameters
params = {
    'alpha': model.alpha
}

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Log the hyperparameters
    mlflow.log_params(params)
    # Log the loss metric
    mlflow.log_metric("rmse_val", rmse_val)
    mlflow.log_metric("rmse_test", rmse_test)
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("model_version", "baseline")

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="wine_model_lasso",
        input_example=x_train,
        registered_model_name="lasso_baseline_1",
    )

Successfully registered model 'lasso_baseline_1'.
2025/08/19 14:27:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: lasso_baseline_1, version 1


🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/980501120094527569/runs/a89e0db8af594231ac37b978719b3e0f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/980501120094527569


Created version '1' of model 'lasso_baseline_1'.


# Running MLflow experiments with different models

In [9]:
def train_model(df: pd.DataFrame, params: dict, model='Lasso'):
    """
    Train a model and return the metrics
    """
    # Split the data into training, validation, and test sets
    train, test = train_test_split(df, test_size=0.25, random_state=42)
    x_train = train.drop(["quality"], axis=1).values
    y_train = train[["quality"]].values.ravel()
    x_test= test.drop(["quality"], axis=1).values
    y_test = test[["quality"]].values.ravel()

    # Split the data into training and validation sets
    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2, random_state=42
    )

    if model == 'Lasso':
        model = Lasso(**params)
    elif model == 'CatBoost':
        model = CatBoostRegressor(**params)
    else:
        raise ValueError(f"Model {model} not supported")

    # Fitting the model
    model.fit(x_train, y_train)
    # Predicting
    y_pred_val = model.predict(x_val)
    y_pred_test = model.predict(x_test)

    # Calculating metrics
    rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    print(f'RMSE val = {rmse_val}, RMSE test = {rmse_test}')
    return model,rmse_val, rmse_test

In [10]:
def log_run(
        run_name: str,
        params: dict,
        metrics: dict,
        tags: dict,
        trained_model,
        model_type='Lasso',
        input_example=None,
        registered_model_name=None):
    """
    Log a run to MLflow
    """
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.set_tag("model_version", run_name)
    mlflow.set_tags(tags)

    if model_type == 'Lasso':
        # Log sklearn model
        model_info = mlflow.sklearn.log_model(
            sk_model=trained_model,
            artifact_path="wine_model_lasso",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    elif model_type == 'CatBoost':
        # Log CatBoost model
        model_info = mlflow.catboost.log_model(
            cb_model=trained_model,
            artifact_path="wine_model_catboost",
            input_example=input_example,
            registered_model_name=registered_model_name,
        )
    else:
        raise ValueError(f"Model type {model_type} not supported")
    
    return model_info

### Running LASSO

In [11]:
# Setting hyperparameters

lasso_params = {
    'alpha': 0.1,
    'max_iter': 1000,
    'random_state': 42
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, lasso_params, model='Lasso')

# Start an MLflow run
with mlflow.start_run(run_name='lasso_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    # Let's add some tags to the run
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="lasso_baseline",
        params=lasso_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='Lasso',
        input_example=x_train  # Make sure x_train is available from train_model
    )
    
    print(f"Model logged with URI: {model_info.model_uri}")
    print(f"RMSE validation: {rmse_val:.4f}")
    print(f"RMSE test: {rmse_test:.4f}")



RMSE val = 0.6356127117266147, RMSE test = 0.638554128638353
Model logged with URI: models:/m-cd9c0c005af146b8abaca7b26acf4d9b
RMSE validation: 0.6356
RMSE test: 0.6386
🏃 View run lasso_baseline at: http://127.0.0.1:8080/#/experiments/980501120094527569/runs/0097e4f10f1646dd888859fba62b0abb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/980501120094527569


In [12]:
# Setting hyperparameters for CatBoost
catboost_params = {
    'iterations': 100,
    'learning_rate': 0.1,
    'depth': 6,
    'random_seed': 42,
    'verbose': False
}

# Train the model
trained_model, rmse_val, rmse_test = train_model(df, catboost_params, model='CatBoost')

# Start an MLflow run
with mlflow.start_run(run_name='catboost_baseline'):
    # Prepare metrics and tags
    metrics = {
        'rmse_val': rmse_val,
        'rmse_test': rmse_test
    }
    
    tags = {
        'model_version': 'baseline',
        'experiment_type': 'regression',
        'data_version': 'v1',
        'algorithm': 'gradient_boosting'
    }
    
    # Log everything using our function
    model_info = log_run(
        run_name="catboost_baseline",
        params=catboost_params,
        metrics=metrics,
        tags=tags,
        trained_model=trained_model,
        model_type='CatBoost',
    )
    
    print(f"Model logged with URI: {model_info.model_uri}")
    print(f"RMSE validation: {rmse_val:.4f}")
    print(f"RMSE test: {rmse_test:.4f}")



RMSE val = 0.5558687336128091, RMSE test = 0.5729224664412881




Model logged with URI: models:/m-81e3db93e807433895f306b8140b9bc4
RMSE validation: 0.5559
RMSE test: 0.5729
🏃 View run catboost_baseline at: http://127.0.0.1:8080/#/experiments/980501120094527569/runs/f426c0014e634410ab587649d87f0022
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/980501120094527569


# Searching for the best model

In [11]:
# Get experiment by name
experiment_name = "Wine Quality Regression"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment '{experiment_name}' has ID: {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found")

Experiment 'Wine Quality Regression' has ID: 253826154506420569


In [21]:
# Find high-performing models across experiments
top_models = mlflow.search_logged_models(
    experiment_ids=[str(experiment_id)],
    filter_string="metrics.rmse_test < 0.9",
    order_by=[{"field_name": "metrics.rmse_val", "ascending": True}],
)
best_model = top_models.iloc[0]

In [22]:
top_models

Unnamed: 0,artifact_location,creation_timestamp,experiment_id,last_updated_timestamp,metrics,model_id,model_type,name,params,source_run_id,status,status_message,tags
0,mlflow-artifacts:/253826154506420569/models/m-...,1755598652618,253826154506420569,1755598653674,"[<Metric: dataset_digest=None, dataset_name=No...",m-c20d70c629ef4099b308680e30d44dc3,,wine_model_catboost,"{'depth': '6', 'learning_rate': '0.1', 'verbos...",0e3e37bcc73d455b824e8d021ac3b0cc,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
1,mlflow-artifacts:/253826154506420569/models/m-...,1755598651571,253826154506420569,1755598652453,"[<Metric: dataset_digest=None, dataset_name=No...",m-e218d3503c184427bbdb2f4f46b96dfb,,wine_model_lasso,"{'random_state': '42', 'max_iter': '1000', 'al...",ae542ea41b72470a8bf268408b13087f,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
2,mlflow-artifacts:/253826154506420569/models/m-...,1755598650524,253826154506420569,1755598651493,"[<Metric: dataset_digest=None, dataset_name=No...",m-0252824ac5ba4db697dd61089d324603,,wine_model_lasso,{'alpha': '0.1'},3504206dacd147999652dd3a0c0a529e,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.m..."
3,mlflow-artifacts:/253826154506420569/models/m-...,1755598140354,253826154506420569,1755598141227,"[<Metric: dataset_digest=None, dataset_name=No...",m-140cb057f327474295042ef4426fff64,,wine_model_lasso,"{'random_state': '42', 'max_iter': '1000', 'al...",6dc98f84eb3244a9990e122c48b20547,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.s..."
4,mlflow-artifacts:/253826154506420569/models/m-...,1755598121990,253826154506420569,1755598123034,"[<Metric: dataset_digest=None, dataset_name=No...",m-16764069e6e94a57a5ed5d10cd7c833d,,wine_model_lasso,{'alpha': '0.1'},5b05dbda731547a7b16d091b2ae48713,READY,,"{'mlflow.user': 'timurbikmuhametov', 'mlflow.m..."


In [23]:
best_model

artifact_location         mlflow-artifacts:/253826154506420569/models/m-...
creation_timestamp                                            1755598652618
experiment_id                                            253826154506420569
last_updated_timestamp                                        1755598653674
metrics                   [<Metric: dataset_digest=None, dataset_name=No...
model_id                                 m-c20d70c629ef4099b308680e30d44dc3
model_type                                                                 
name                                                    wine_model_catboost
params                    {'depth': '6', 'learning_rate': '0.1', 'verbos...
source_run_id                              0e3e37bcc73d455b824e8d021ac3b0cc
status                                                                READY
status_message                                                             
tags                      {'mlflow.user': 'timurbikmuhametov', 'mlflow.s...
Name: 0, dty

In [24]:
# loaded_model = mlflow.pyfunc.load_model(f"mlflow-artifacts:/{best_model.experiment_id}/models/{best_model.model_id}/artifacts")
loaded_model = mlflow.pyfunc.load_model(f"{best_model.artifact_location}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [25]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: mlflow-artifacts:/253826154506420569/models/m-c20d70c629ef4099b308680e30d44dc3/artifacts
  flavor: mlflow.catboost
  run_id: 0e3e37bcc73d455b824e8d021ac3b0cc

In [26]:
# Load the best model directly
loaded_model = mlflow.pyfunc.load_model(f"models:/{best_model.model_id}")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [27]:
loaded_model.predict(x_test)

array([5.26025073, 4.9595734 , 5.36203291, 5.23560873, 6.07836592,
       6.60682351, 5.31153175, 5.12167314, 5.78321961, 5.03908073,
       6.41653014, 6.29602144, 5.34480938, 6.28064416, 5.1472709 ,
       5.06744798, 5.7992744 , 5.45268741, 5.1313353 , 6.91733278,
       6.51237718, 6.06400099, 5.09038892, 6.75156149, 5.50293257,
       5.30706481, 6.33206477, 6.17691675, 5.02974341, 5.64910286,
       5.35402858, 5.64737727, 6.48740719, 5.8854732 , 5.46302839,
       5.37344437, 5.52181087, 6.42136897, 6.5928192 , 5.77839632,
       6.5483293 , 5.15017947, 6.16860697, 5.32383387, 5.13514657,
       5.23559906, 5.55455713, 6.05801224, 4.89755257, 5.65701363,
       6.78133425, 5.16450426, 5.70759668, 6.29083085, 5.55479106,
       6.48507469, 5.69471691, 5.78451992, 6.08785042, 5.55812174,
       5.18716616, 6.0141601 , 6.02013041, 6.40557773, 6.57592665,
       5.58508846, 6.87301101, 5.19324708, 5.25059116, 5.37486675,
       5.72759617, 5.45203135, 5.69588958, 5.64892336, 5.08725

To find the best model, we need an experiment ID for which we want to find the model

In [10]:
# Get all experiments
experiments = mlflow.search_experiments()

for exp in experiments:
    print(f"Experiment ID: {exp.experiment_id}, Name: {exp.name}")

Experiment ID: 726012541908738036, Name: Wine Quality Regression
Experiment ID: 0, Name: Default


In [29]:
# Search runs instead of registered models (much faster and more comprehensive)
runs = mlflow.search_runs(
    experiment_ids=['726012541908738036'],
    filter_string="metrics.rmse_test < 0.9",
    order_by=["metrics.rmse_val ASC"],  # Lower RMSE is better
    max_results=5
)
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.rmse_val,metrics.rmse_test,params.iterations,params.depth,...,params.random_state,params.alpha,tags.algorithm,tags.mlflow.source.type,tags.data_version,tags.experiment_type,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.name,tags.model_version
0,7af575228fd94f52a2926e220a2da5d2,726012541908738036,FINISHED,mlflow-artifacts:/726012541908738036/7af575228...,2025-08-19 09:09:50.304000+00:00,2025-08-19 09:09:51.605000+00:00,0.555869,0.572922,100.0,6.0,...,,,gradient_boosting,LOCAL,v1,regression,timurbikmuhametov,catboost_baseline,/Users/timurbikmuhametov/miniconda3/envs/maist...,baseline
1,64386ede47074b06b5474942a70009ca,726012541908738036,FINISHED,mlflow-artifacts:/726012541908738036/64386ede4...,2025-08-19 09:32:15.188000+00:00,2025-08-19 09:32:16.181000+00:00,0.635613,0.638554,,,...,42.0,0.1,,LOCAL,,regression,timurbikmuhametov,lasso_baseline,/Users/timurbikmuhametov/miniconda3/envs/maist...,baseline
2,8f1dbc55bfea453091c4e18c18f81401,726012541908738036,FINISHED,mlflow-artifacts:/726012541908738036/8f1dbc55b...,2025-08-19 09:32:13.973000+00:00,2025-08-19 09:32:15.169000+00:00,0.635613,0.638554,,,...,,0.1,,LOCAL,,,timurbikmuhametov,lasso_baseline,/Users/timurbikmuhametov/miniconda3/envs/maist...,baseline
3,f522cecef12544c4a64dcf5c7407484f,726012541908738036,FINISHED,mlflow-artifacts:/726012541908738036/f522cecef...,2025-08-19 09:31:50.755000+00:00,2025-08-19 09:31:51.749000+00:00,0.635613,0.638554,,,...,42.0,0.1,,LOCAL,,regression,timurbikmuhametov,lasso_baseline,/Users/timurbikmuhametov/miniconda3/envs/maist...,baseline
4,09fd041c2c47487ab9ebd17f229d91b5,726012541908738036,FINISHED,mlflow-artifacts:/726012541908738036/09fd041c2...,2025-08-19 09:31:49.336000+00:00,2025-08-19 09:31:50.734000+00:00,0.635613,0.638554,,,...,,0.1,,LOCAL,,,timurbikmuhametov,lasso_baseline,/Users/timurbikmuhametov/miniconda3/envs/maist...,baseline


In [12]:
runs.iloc[0]

run_id                                      7af575228fd94f52a2926e220a2da5d2
experiment_id                                             726012541908738036
status                                                              FINISHED
artifact_uri               mlflow-artifacts:/726012541908738036/7af575228...
start_time                                  2025-08-19 09:09:50.304000+00:00
end_time                                    2025-08-19 09:09:51.605000+00:00
metrics.rmse_val                                                    0.555869
metrics.rmse_test                                                   0.572922
params.iterations                                                        100
params.depth                                                               6
params.learning_rate                                                     0.1
params.verbose                                                         False
params.random_seed                                                        42

In [20]:
# Get the run ID of the latest "best_model" if there are several
best_exp_id = runs.iloc[0].experiment_id
best_run_id = runs.iloc[0].run_id
artifact_location = runs.iloc[0].artifact_uri
print(artifact_location)

mlflow-artifacts:/726012541908738036/7af575228fd94f52a2926e220a2da5d2/artifacts


In [51]:
loaded_model = mlflow.pyfunc.load_model("./mlartifacts/726012541908738036/models/m-51cde1c7c1c64d89b5a4cdc13c3cd7ae/artifacts/")

In [52]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: mlflow-artifacts:/726012541908738036/models/m-51cde1c7c1c64d89b5a4cdc13c3cd7ae/artifacts
  flavor: mlflow.sklearn
  run_id: d7478d26be944533aaad05b44c27ef20

In [22]:
"./mlartifacts/726012541908738036/models/m-51cde1c7c1c64d89b5a4cdc13c3cd7ae/artifacts/model.pkl"

'./mlartifacts/726012541908738036/models/m-51cde1c7c1c64d89b5a4cdc13c3cd7ae/artifacts/model.pkl'

In [23]:
f"{artifact_location}/model.pkl"

'mlflow-artifacts:/726012541908738036/7af575228fd94f52a2926e220a2da5d2/artifacts/model.pkl'

In [14]:
# Search for the best model
client = MlflowClient()

In [49]:
with open(f"{artifact_location}/model.pkl", 'rb') as f:
    model = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: 'mlflow-artifacts:/726012541908738036/7af575228fd94f52a2926e220a2da5d2/artifacts/model.pkl'

In [19]:
# Load the best model directly
loaded_model = mlflow.pyfunc.load_model(f"{artifact_location}/model.pkl")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [55]:
model_path = "./mlartifacts/726012541908738036/models/m-51cde1c7c1c64d89b5a4cdc13c3cd7ae/artifacts/model.pkl"
with open(model_path, 'rb') as f:
    model = pickle.load(f)

In [56]:
model

In [48]:
runs.iloc[0].run_id

'7af575228fd94f52a2926e220a2da5d2'

In [32]:
best_model.model_id

'm-d741678831344f049a6ea35fd421bbcc'

## Child runs

Within the same run, you can create child runs with different parameters

In [50]:
# Define a function to log parameters and metrics and add tag
# logging for search_runs functionality
def log_run(run_name, iteration, params, rmse_val, rmse_test):
    with mlflow.start_run(run_name=f"{run_name}_{iteration}", nested=True):
        # Log the hyperparameters
        mlflow.log_params(params)
        # Log the loss metric
        mlflow.log_metric("rmse_error_val", rmse_val)
        mlflow.log_metric("rmse_error_test", rmse_test)

In [45]:
# Specify several alphas
alpha_range = np.logspace(0, -3, num=10).tolist()
print(alpha_range)

[1.0, 0.4641588833612779, 0.2154434690031884, 0.1, 0.046415888336127795, 0.021544346900318846, 0.01, 0.004641588833612782, 0.0021544346900318843, 0.001]


In [47]:
run_name = 'Lasso_nested'
with mlflow.start_run(run_name=run_name):
    for idx, alpha in enumerate(alpha_range):
        model = Lasso(alpha=alpha)
        model.fit(x_train, y_train)
        # Predictions
        y_pred_val = model.predict(x_val)
        y_pred_test = model.predict(x_test)
        # Erros
        rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
        rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
        print(f'Iteration {idx}, rmse={rmse_test}')
        # Writing params
        params = {'alpha': alpha}
        log_run(run_name, idx, params, rmse_val, rmse_test)

Iteration 0, rmse=0.7374379792573892
🏃 View run Lasso_nested_0 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/6f9fc596e26e41a4b2ca7da25f60e43d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 1, rmse=0.7366319489063405
🏃 View run Lasso_nested_1 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/a8a8a49b7ee24291851614e8a1a531ea
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 2, rmse=0.6743324662326164
🏃 View run Lasso_nested_2 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/b31bfac145a041a79bfc32483ac0d2a1
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 3, rmse=0.638554128638353
🏃 View run Lasso_nested_3 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/ce7ddb15126a4965833f0d4d2a74d918
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Iteration 4, rmse=0.628923587071802
🏃 View run Lasso_

### Child runs with Optuna

Child runs are useful when tuning hyperparameters under the same run. Here's an example of how to rune hyperparameters wth Optuna.

In [64]:
# Specify the objective function with Optuna
def objective(trial, x_train, y_train, x_val, y_val, x_test, y_test):
    # Sample alpha
    alpha = trial.suggest_float('alpha', 0.0001, 1, log=True)
    model = Lasso(alpha=alpha)
    model.fit(x_train, y_train)
    # Predictions
    y_pred_val = model.predict(x_val)
    y_pred_test = model.predict(x_test)
    # Erros
    rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    params = {'alpha': alpha}
    log_run(run_name, trial.number, params, rmse_val, rmse_test)
    return round(rmse_val, 3)

In [65]:
study = optuna.create_study(direction="minimize")

[I 2025-08-02 14:03:24,962] A new study created in memory with name: no-name-7b154b66-c55b-451a-a7fd-82aef6d089e0


In [66]:
run_name = 'Lasso_optuna'
with mlflow.start_run(run_name=run_name):
    study.optimize(
    lambda trial: objective(trial, x_train, y_train, x_val, y_val, x_test, y_test),
    n_trials=10
)
    # Log the best metrics and parameters info
    mlflow.log_metric("best_rmse", study.best_value)
    mlflow.log_params(study.best_params)
    mlflow.set_tag('best_model', 'true')
    # Refit and log the best model
    model = Lasso(**study.best_params)
    model.fit(x_train, y_train)
    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        name="best_model_lasso",
        input_example=x_train,
#         registered_model_name="best_model_lasso",
    )

[I 2025-08-02 14:03:25,217] Trial 0 finished with value: 0.572 and parameters: {'alpha': 0.0012763906181106578}. Best is trial 0 with value: 0.572.
[I 2025-08-02 14:03:25,311] Trial 1 finished with value: 0.579 and parameters: {'alpha': 0.008637636160407839}. Best is trial 0 with value: 0.572.


Alpha = 0.0012763906181106578
RMSE val = 0.5715216132633616, RMSE test = 0.609019662261828
🏃 View run Lasso_optuna_0 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/10d2cac749374ae7a827f64d27c7f9bd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.008637636160407839
RMSE val = 0.5794276266600845, RMSE test = 0.6056065029825204
🏃 View run Lasso_optuna_1 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/af23ba2897ea40b3b97c02c8c9ff3842
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.14109067380360615
RMSE val = 0.6474581339947599, RMSE test = 0.6513333206284194


[I 2025-08-02 14:03:25,400] Trial 2 finished with value: 0.647 and parameters: {'alpha': 0.14109067380360615}. Best is trial 0 with value: 0.572.
[I 2025-08-02 14:03:25,487] Trial 3 finished with value: 0.571 and parameters: {'alpha': 0.0003134351413728864}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,570] Trial 4 finished with value: 0.572 and parameters: {'alpha': 0.001552855110295442}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_2 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/b59221ff85514ce7aab1a2be2c19f369
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0003134351413728864
RMSE val = 0.5711530951272646, RMSE test = 0.6122679319127128
🏃 View run Lasso_optuna_3 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/91d49b5a1c984a24bb460e745893fff9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.001552855110295442
RMSE val = 0.5718007267437641, RMSE test = 0.6083471053016355
🏃 View run Lasso_optuna_4 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/8fea6c4275354961bdd21bfc60ed903b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0012427079085965739
RMSE val = 0.5714928872439479, RMSE test = 0.609109561490189


[I 2025-08-02 14:03:25,653] Trial 5 finished with value: 0.571 and parameters: {'alpha': 0.0012427079085965739}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,739] Trial 6 finished with value: 0.577 and parameters: {'alpha': 0.004346113270371531}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,819] Trial 7 finished with value: 0.577 and parameters: {'alpha': 0.0049069405724022725}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_5 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/77eb88aaa1254c2ca4d12d687cbe9158
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.004346113270371531
RMSE val = 0.577035647476002, RMSE test = 0.6061785448193832
🏃 View run Lasso_optuna_6 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/1393d92301044a34924da22640823c4a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.0049069405724022725
RMSE val = 0.5772927165625208, RMSE test = 0.6060153497229979
🏃 View run Lasso_optuna_7 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/de7f47df5e0648ed9a9fa133ef36a7d9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.00026521823057008744
RMSE val = 0.5711593196633804, RMSE test = 0.6124673246369146


[I 2025-08-02 14:03:25,898] Trial 8 finished with value: 0.571 and parameters: {'alpha': 0.00026521823057008744}. Best is trial 3 with value: 0.571.
[I 2025-08-02 14:03:25,968] Trial 9 finished with value: 0.577 and parameters: {'alpha': 0.005334063295402457}. Best is trial 3 with value: 0.571.


🏃 View run Lasso_optuna_8 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/e86929c9cda1458db59fb8d2367dd814
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
Alpha = 0.005334063295402457
RMSE val = 0.5774996111639723, RMSE test = 0.605908854850016
🏃 View run Lasso_optuna_9 at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/fb2366d8c5544774a0576a842dde20c8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067
🏃 View run Lasso_optuna at: http://127.0.0.1:8080/#/experiments/516757630045126067/runs/34405e1852b04624bcb1fe1a93bd5146
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/516757630045126067


### After the run, we can compare the runs, check the scatter and countour plots of the parameter

# Loading and registering the best optuna model

In [67]:
# Search for the best model
client = MlflowClient()

In [68]:
# Search for runs with the "best_model" tag set to "true"
runs = mlflow.search_runs(
    filter_string="tags.best_model = 'true'",
    order_by=["start_time DESC"],  # Sort by the latest run
    max_results=1  # Get only the most recent run
)

In [69]:
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.best_rmse,params.alpha,tags.mlflow.source.type,tags.mlflow.source.name,tags.mlflow.runName,tags.best_model,tags.mlflow.user
0,34405e1852b04624bcb1fe1a93bd5146,516757630045126067,FINISHED,mlflow-artifacts:/516757630045126067/34405e185...,2025-08-02 10:03:25.121000+00:00,2025-08-02 10:03:27.047000+00:00,0.571,0.0003134351413728,LOCAL,/Users/timurbikmuhametov/miniconda3/envs/ml_bl...,Lasso_optuna,True,timurbikmuhametov


In [70]:
# Get the run ID of the latest "best_model" if there are several
latest_exp_id = runs.iloc[0].experiment_id
latest_run_id = runs.iloc[0].run_id

In [71]:
# List all artifacts for the given run_id
artifacts = client.list_artifacts(latest_run_id)
for artifact in artifacts:
    print(artifact.path)

In [72]:
# Load the latest best model (adjust artifact path if needed)
model_uri = f"runs:/{latest_run_id}/best_model_lasso"  # or ridge_model, etc.
latest_best_model = mlflow.sklearn.load_model(model_uri)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [74]:
latest_best_model

In [73]:
# Register the best model
mlflow.register_model(model_uri, "prod_lasso_model")

Successfully registered model 'prod_lasso_model'.
2025/08/02 14:08:37 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: prod_lasso_model, version 1
Created version '1' of model 'prod_lasso_model'.


<ModelVersion: aliases=[], creation_timestamp=1754129317360, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1754129317360, metrics=None, model_id=None, name='prod_lasso_model', params=None, run_id='34405e1852b04624bcb1fe1a93bd5146', run_link='', source='models:/m-279c6abed3a24012b3169306a0d3726f', status='READY', status_message=None, tags={}, user_id='', version='1'>

# Loading the registered model

In [73]:
model_name = "prod_lasso_model"
model_version = "latest"

# Load the model from the Model Registry
model_uri = f"models:/{model_name}/{model_version}"
model = mlflow.sklearn.load_model(model_uri)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]