In [2]:
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from mlflow.models import infer_signature
import warnings

# suppress the MLflow model config warning if present
warnings.filterwarnings('ignore', message='Failed to log model config as params')

mlflow.set_tracking_uri('sqlite:///search_models.db')
mlflow.set_experiment('model_search_guide')

# model configurations
model_configs = [
    {"model_type": "RandomForest", "n_estimators":100, "max_depth":10},
    {"model_type": "RandomForest","n_estimators":200, "max_depth":20},
    {"model_type": "LogisticRegression","C":1.0, "solver":"lbfgs"},
    {"model_type": "LogisticRegression","C":0.1, "solver":"saga"},
    {"model_type": "SVM", "kernel":"rbf", "C":1.0},
    {"model_type": "SVM","kernel":"linear","C":0.5}
    ]

# performance metrics
accuracy_scores = [0.92, 0.94, 0.88, 0.86, 0.90, 0.87]
precision_scores = [0.91, 0.93, 0.87, 0.85, 0.89, 0.86]
recall_scores = [0.93, 0.95, 0.89, 0.87, 0.91, 0.88]
f1_scores = [0.92, 0.94, 0.88, 0.86, 0.90, 0.87]

# model metadata 
versions = ["v1.0", "v1.1", "v1.0", "v2.0", "v1.0", "v1.1"]
environments = [
    'production','staging','production','development','staging','production'
]
frameworks = ["sklearn", "sklearn", "sklearn", "sklearn", "sklearn", "sklearn"]

# create dummy training data
X_train = np.random.rand(100,10)
y_train = np.random.randint(0, 2, 100)

# create input example for model signature
input_example = pd.DataFrame(X_train[:5], columns=[f'feature_{i}' for i in range(10)])

for i, config in enumerate(model_configs):
    with mlflow.start_run():
        # create and train model on type
        if config['model_type'] == 'RandomForest':
            model = RandomForestClassifier(
                n_estimators=config['n_estimators'],
                max_depth=config['max_depth']
            )

            mlflow.log_param('n_estimators', config['n_estimators'])
            mlflow.log_param('max_depth', config['max_depth'])

        elif config['model_type'] == 'LogisticRegression':
            model = LogisticRegression(
                C=config['C'],
                solver=config['solver']
            )

            mlflow.log_param('C', config['C'])
            mlflow.log_param('solver', config['solver'])

        else:
            model = SVC(
                kernel=config['kernel'],
                C=config['C'],
                random_state=42,
                probability=True
            )

            mlflow.log_param('kernel',config['kernel'])
            mlflow.log_param('C', config['C'])

        # log common parameters
        mlflow.log_param("model_type", config['model_type'])

        # fit model
        model.fit(X_train, y_train)

        # get predictions for signature
        predictions = model.predict(X_train[:5])

        # create model signature
        signature = infer_signature(X_train[:5], predictions)

        # log metrics
        mlflow.log_metric("accuracy", accuracy_scores[i])
        mlflow.log_metric('precision', precision_scores[i])
        mlflow.log_metric('recall', recall_scores[i])
        mlflow.log_metric('f1_score', f1_scores[i])

        # set tags
        mlflow.set_tag("version", versions[i])
        mlflow.set_tag("environment", environments[i])
        mlflow.set_tag("framework", frameworks[i])

        # log the model signature and input example
        model_name = f'{config['model_type']}_model_{i}'
        mlflow.sklearn.log_model(
            model, 
            name=model_name,
            signature=signature,
            input_example=input_example,
            registered_model_name=f'SearchGuide{config['model_type']}'
        )

2026/01/07 06:57:13 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/07 06:57:13 INFO mlflow.store.db.utils: Updating database tables
2026-01-07 06:57:13 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2026-01-07 06:57:13 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2026-01-07 06:57:13 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2026-01-07 06:57:13 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 837.35it/s]
2026/01/07 06:57:48 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/07 06:57:48 INFO mlflow.store.db.utils: Updating database tables
2026-01-07 06:57:48 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2026-01-07 06:57:48 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
Registered model 'SearchGuideRandomForest' alrea

In [3]:
# find high-performing models
high_accuracy_models = mlflow.search_logged_models(
    filter_string = "metrics.accuracy > 0.9"
)

balanced_models = mlflow.search_logged_models(
    filter_string="metrics.recall > 0.90 AND metrics.precision > 0.88"
)
print(high_accuracy_models)
print(balanced_models)

                                   artifact_location  creation_timestamp  \
0  file:c:/Users/Maxwe/OneDrive/Documents/ML-from...       1767769070295   
1  file:c:/Users/Maxwe/OneDrive/Documents/ML-from...       1767769035243   
2  file:c:/Users/Maxwe/OneDrive/Documents/ML-from...       1767736948249   
3  file:c:/Users/Maxwe/OneDrive/Documents/ML-from...       1767736923088   

  experiment_id  last_updated_timestamp  \
0             1           1767769081729   
1             1           1767769068769   
2             1           1767736958717   
3             1           1767736946969   

                                             metrics  \
0  [<Metric: dataset_digest=None, dataset_name=No...   
1  [<Metric: dataset_digest=None, dataset_name=No...   
2  [<Metric: dataset_digest=None, dataset_name=No...   
3  [<Metric: dataset_digest=None, dataset_name=No...   

                             model_id model_type                  name  \
0  m-d77549707c6e42d69110bdb98ee12a9b       None

In [5]:
from mlflow import MlflowClient

client = MlflowClient()

page_token = None
all_models = []

while True:
    result = client.search_logged_models(
        experiment_ids=['1'],
        filter_string='metrics.accuracy > 0.85',
        max_results=10,
        page_token=page_token
    )

    all_models.extend(result.to_list())

    if not result.token:
        break
    page_token = result.token

print(f"Found {len(all_models)} models")

Found 12 models


In [13]:
best_model = mlflow.search_logged_models(
    experiment_ids=['1'],
    max_results=1,
    order_by=[
        {
            'field_name':'metrics.f1_score','ascending':False
        }
    ],
    output_format='list'
)[0]
best_model
accuracy_metric = next(
    (metric for metric in best_model.metrics if metric.key == 'accuracy'),None
)
print(f'Model_id: {best_model.model_id}, Accuracy: {accuracy_metric.value}')

Model_id: m-d77549707c6e42d69110bdb98ee12a9b, Accuracy: 0.94
