In [0]:
%pip install --quiet mlflow==2.22.0
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
dbutils.widgets.text("catalog", "mlops")
dbutils.widgets.text("schema", "mlops_zoomcamp_prj")
dbutils.widgets.text("tbl_name", "soft_quality_features")


catalog = dbutils.widgets.get("catalog")
db = dbutils.widgets.get("schema")
tbl_name = dbutils.widgets.get("tbl_name")
current_user = dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()

In [0]:
import mlflow



xp_name = "software-defects"
xp_path = f"/Users/{current_user}/mlops_prj"

churn_experiment_name = "churn_auto_ml"
model_name = f"{catalog}.{db}.software_defects"
print(f"Finding best run from {churn_experiment_name}_* and pushing new model version to {model_name}")
xp_path = f"/Users/{current_user}/mlops_prj"

experiment_id = mlflow.search_experiments(filter_string=f"name LIKE '{xp_path}/{xp_name}%'", order_by=["last_update_time DESC"])[0].experiment_id
print(experiment_id)

Finding best run from churn_auto_ml_* and pushing new model version to mlops.mlops_zoomcamp_prj.software_defects
207411282372909


In [0]:
# Let's get our best ml run
best_model = mlflow.search_runs(
  experiment_ids=experiment_id,
  order_by=["metrics.accuracy DESC"],
  max_results=1,
#   filter_string="status = 'FINISHED' and run_name='mlops_best_run'" #filter on mlops_best_run to always use the notebook 02 to have a more predictable demo
)
# Optional: Load MLflow Experiment as a spark df and see all runs
# df = spark.read.format("mlflow-experiment").load(experiment_id)
best_model

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,params.threeshold,params.n_estimators,params.max_depth,tags.mlflow.databricks.cluster.info,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.runColor,tags.mlflow.databricks.notebook.commandID,tags.mlflow.databricks.workspaceURL,tags.mlflow.databricks.notebookRevisionID,tags.mlflow.log-model.history,tags.mlflow.databricks.cluster.libraries,tags.mlflow.databricks.cluster.id,tags.mlflow.databricks.notebookID,tags.mlflow.databricks.notebookPath,tags.mlflow.databricks.workspaceID,tags.mlflow.databricks.webappURL,tags.mlflow.source.type
0,ed0db8e48c4342d1b96ec92b258f878f,207411282372909,FINISHED,dbfs:/databricks/mlflow-tracking/2074112823729...,2025-07-12 10:28:06.729000+00:00,2025-07-12 10:28:11.173000+00:00,0.896739,0.5,40,6,"{""cluster_name"":"""",""spark_version"":""client.2.5...",/Users/dmitriy.shametko@gmail.com/2_train_model,dmitriy.shametko@gmail.com,resilient-pig-561,#7d54b2,1752315826002_5681728457939667123_99249bc39968...,https://dbc-88b955c3-50c9.cloud.databricks.com,1752316091225,"[{""artifact_path"":""sklearn_model"",""flavors"":{""...","{""installable"":[],""redacted"":[]}",0712-102343-6l9qw845-v2n,3017689002890933,/Users/dmitriy.shametko@gmail.com/2_train_model,4145713868919298,https://dbc-88b955c3-50c9.cloud.databricks.com,NOTEBOOK


In [0]:
print(f"Registering model to {model_name}")  # {model_name} is defined in the setup script

# Get the run id from the best model
run_id = best_model.iloc[0]['run_id']

# Register the best model from experiments run to MLflow model registry
model_details = mlflow.register_model(f"runs:/{run_id}/sklearn_model", model_name)

Registering model to mlops.mlops_zoomcamp_prj.software_defects


Successfully registered model 'mlops.mlops_zoomcamp_prj.software_defects'.
Created version '1' of model 'mlops.mlops_zoomcamp_prj.software_defects'.


In [0]:
from mlflow import MlflowClient

client = MlflowClient()

# The main model description is typically done once.
client.update_registered_model(
  name=model_details.name,
  description="This model predicts whether a software has defects or not",
)

<RegisteredModel: aliases={}, creation_timestamp=1752317285359, description='This model predicts whether a software has defects or not', last_updated_timestamp=1752317337959, latest_versions=None, name='mlops.mlops_zoomcamp_prj.software_defects', tags={}>

In [0]:
# Provide more details on this specific model version
best_score = best_model['metrics.accuracy'].values[0]
run_name = best_model['tags.mlflow.runName'].values[0]
version_desc = f"This model version has an accuracy metric of {round(best_score,4)*100}%. Follow the link to its training run for more details."

client.update_model_version(
  name=model_details.name,
  version=model_details.version,
  description=version_desc
)

# We can also tag the model version with the accuracy score for visibility
client.set_model_version_tag(
  name=model_details.name,
  version=model_details.version,
  key="accuracy",
  value=f"{round(best_score,4)}"
)

In [0]:
# Set this version as the Candidate model, using its model alias
client.set_registered_model_alias(
  name=model_name,
  alias="candidate",
  version=model_details.version
)