This notebook provides a quick example of using a Databricks tracking server to access and use a logged model.

In [31]:
import os

os.environ['MLFLOW_TRACKING_URI'] = 'databricks-uc'
os.environ['DATABRICKS_HOST'] = 'adb-984752964297111.11.azuredatabricks.net'
os.environ['DATABRICKS_TOKEN'] = 'dapi..'

In [3]:
import mlflow
mlflow.set_tracking_uri('databricks')
mlflow.set_registry_uri('databricks-uc')

experiment_id = 'c4b67a6b622a4467b79e8f81753c1fb6'
experiment = mlflow.get_experiment(experiment_id)

We can search runs using this experiment

In [9]:
runs = mlflow.search_runs(
    experiment_names = [experiment.name],
    max_results=3
)
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.valid_0-l2,metrics.custom_metric,params.learning_rate,params.num_leaves,...,tags.mlflow.databricks.gitRepoReferenceType,tags.mlflow.databricks.workspaceID,tags.sparkDatasourceInfo,tags.mlflow.databricks.gitRepoReference,tags.mlflow.databricks.cluster.info,tags.mlflow.databricks.gitRepoUrl,tags.mlflow.source.type,tags.mlflow.databricks.notebook.commandID,tags.mlflow.user,tags.mlflow.loggedArtifacts
0,b1f92d1f13f64aaab5df646c51230098,c4b67a6b622a4467b79e8f81753c1fb6,FINISHED,dbfs:/databricks/mlflow-tracking/c4b67a6b622a4...,2025-02-20 16:44:00.791000+00:00,2025-02-20 16:44:07.867000+00:00,,,,,...,branch,984752964297111,path=abfss:REDACTED_LOCAL_PART@unitydemo.dfs.c...,m5_benchmark,"{""cluster_name"":""shm_ml_cpu_d16"",""spark_versio...",https://github.com/ScottHMcKean/timeseries_res...,NOTEBOOK,1740065384976_7028051407422611977_45601b3af8ea...,scott.mckean@databricks.com,
1,090ed983105341dd93aa2c8004dc6b28,c4b67a6b622a4467b79e8f81753c1fb6,FINISHED,dbfs:/databricks/mlflow-tracking/c4b67a6b622a4...,2025-02-20 16:43:21.543000+00:00,2025-02-20 16:43:59.275000+00:00,0.71504,1.767597e-12,0.05,31.0,...,branch,984752964297111,path=abfss:REDACTED_LOCAL_PART@unitydemo.dfs.c...,m5_benchmark,"{""cluster_name"":""shm_ml_cpu_d16"",""spark_versio...",https://github.com/ScottHMcKean/timeseries_res...,NOTEBOOK,1740065384976_5708687477746039863_022a30d7fa03...,scott.mckean@databricks.com,"[{""path"": ""train.parquet"", ""type"": ""table""}, {..."
2,e743843437e3436cafff68d9dc365d47,c4b67a6b622a4467b79e8f81753c1fb6,FINISHED,dbfs:/databricks/mlflow-tracking/c4b67a6b622a4...,2025-02-20 16:36:01.237000+00:00,2025-02-20 16:36:09.441000+00:00,,,,,...,branch,984752964297111,path=abfss:REDACTED_LOCAL_PART@unitydemo.dfs.c...,m5_benchmark,"{""cluster_name"":""shm_ml_cpu_d16"",""spark_versio...",https://github.com/ScottHMcKean/timeseries_res...,NOTEBOOK,1740065384976_6045933118849256093_ad10346ca09d...,scott.mckean@databricks.com,


We can also download artifacts, including the model files

In [29]:
mlflow.artifacts.download_artifacts(run_id=runs.iloc[0].run_id)

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

'/var/folders/2x/pf13chqx4614qjlmjdgmndv00000gp/T/tmp3v75flfa/'

We can also directly load the model using MLFLow flavors and use it for prediction

In [24]:
model_name = "models:/shm.timeseries.lightgbm_model/11"
model = mlflow.lightgbm.load_model(model_name)

In [27]:
import pandas as pd

# Create a DataFrame from the data
data = [[3.0, 11.0, 2.0, 2.0, 3.0, 0.0, 27.0, 10.954802513122559, 3.7142856121063232, 0.0],
        [3.0, 3.0, 11.0, 4.0, 5.0, 0.0, 32.0, 10.935211181640625, 4.0, 0.0]]

columns = ["lag1", "lag2", "lag3", "lag7", "lag14", "lag30", "lag360",
           "expanding_mean_lag7", "rolling_mean_lag14_window_size7", 
           "rolling_mean_lag30_window_size14"]

df = pd.DataFrame(data=data, columns=columns)
df

Unnamed: 0,lag1,lag2,lag3,lag7,lag14,lag30,lag360,expanding_mean_lag7,rolling_mean_lag14_window_size7,rolling_mean_lag30_window_size14
0,3.0,11.0,2.0,2.0,3.0,0.0,27.0,10.954803,3.714286,0.0
1,3.0,3.0,11.0,4.0,5.0,0.0,32.0,10.935211,4.0,0.0


In [28]:
model.predict(df)

array([3.46303008, 7.58185502])