This notebook provides a minimum reproducible example of sql toolkits in LangGraph, connecting to Unity Catalog tables. We profile standard calls versus online tables.

Some resources

- https://python.langchain.com/docs/integrations/providers/databricks/#sqldatabase
- https://docs.databricks.com/aws/en/generative-ai/agent-framework/structured-retrieval-tools#table-query-tool

In [0]:
%pip install unitycatalog-ai[databricks]
%pip install unitycatalog-langchain[databricks]

In [0]:
from unitycatalog.ai.core.databricks import DatabricksFunctionClient
from unitycatalog.ai.langchain.toolkit import UCFunctionToolkit
client = DatabricksFunctionClient()

## Standard Delta Table Toolkit

In [0]:
CATALOG = 'shm'
SCHEMA = 'iot_turbine'

In [0]:
%sql
CREATE OR REPLACE FUNCTION shm.iot_turbine.part_info(
  stock_location STRING COMMENT 'Region, e.g. America/Denver'
)
RETURNS STRING
COMMENT 'Returns an string of EAN, location, and stock available. The EAN can be used for other queries'
RETURN SELECT CONCAT(
  'EAN: ', EAN, ', ','Region: ', stock_location, 'Stock: ', stock_available
)
FROM shm.iot_turbine.parts
WHERE stock_location = stock_location
AND TYPE = 'blade'
ORDER BY stock_available
LIMIT 1

In [0]:
# Define the UC function to be used as a tool
func_name = f"{CATALOG}.{SCHEMA}.part_info"

# Create a toolkit with the UC function
toolkit = UCFunctionToolkit(function_names=[func_name], client=client)

tools = toolkit.tools

In [0]:
import time
import numpy as np

response_times = []

for _ in range(10):
    start_time = time.time()
    tools[0].run({'stock_location': 'America/Denver'})
    end_time = time.time()
    response_times.append(end_time - start_time)

mean_response_time = np.mean(response_times)
variance_response_time = np.var(response_times)

"mean: {:.2f}, variance: {:.2f}".format(mean_response_time, variance_response_time)

## Online Table Toolkit

In [0]:
%sql
CREATE OR REPLACE FUNCTION shm.iot_turbine.part_info_live(
  stock_location STRING COMMENT 'Region, e.g. America/Denver'
)
RETURNS STRING
COMMENT 'Returns an string of EAN, location, and stock available. The EAN can be used for other queries'
RETURN SELECT CONCAT(
  'EAN: ', EAN, ', ','Region: ', stock_location, 'Stock: ', stock_available
)
FROM shm.iot_turbine.parts_live
WHERE stock_location = stock_location
AND TYPE = 'blade'
ORDER BY stock_available
LIMIT 1

In [0]:
# Define the UC function to be used as a tool
func_name = f"{CATALOG}.{SCHEMA}.part_info_live"

# Create a toolkit with the UC function
live_toolkit = UCFunctionToolkit(function_names=[func_name], client=client)

live_tools = live_toolkit.tools

In [0]:
import time
import numpy as np

response_times = []

for _ in range(10):
    start_time = time.time()
    live_tools[0].run({'stock_location': 'America/Denver'})
    end_time = time.time()
    response_times.append(end_time - start_time)

mean_response_time = np.mean(response_times)
variance_response_time = np.var(response_times)

"mean: {:.2f}, variance: {:.2f}".format(mean_response_time, variance_response_time)