In [0]:
# Databricks notebook source
!pip install langchain databricks-langchain --quiet
dbutils.library.restartPython()

import os
from databricks_langchain import ChatDatabricks


llm = ChatDatabricks(endpoint="databricks-claude-sonnet-4-5")


ai_msg = llm.invoke("Hello world!")


ai_msg

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


AIMessage(content='Hello! 👋 How can I help you today?', additional_kwargs={}, response_metadata={'usage': {'prompt_tokens': 10, 'completion_tokens': 16, 'total_tokens': 26}, 'prompt_tokens': 10, 'completion_tokens': 16, 'total_tokens': 26, 'model': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'model_name': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'finish_reason': 'stop'}, id='lc_run--019af8a3-232e-7223-a949-0a6a1e51b72e-0')

In [0]:
%sql
create schema llm_observability;

In [0]:
%sql
CREATE or replace TABLE  llm_observability.token_usage_audit (
    user_id STRING,
    application_id STRING,
    model_name STRING,
    prompt_tokens INT,
    completion_tokens INT,
    total_tokens INT,
    created_at TIMESTAMP
)
USING DELTA;

In [0]:
%sql
select * from llm_observability.token_usage_audit;

user_id,application_id,model_name,prompt_tokens,completion_tokens,total_tokens,created_at


In [0]:
from datetime import datetime
from langchain_core.callbacks.base import BaseCallbackHandler   # Base class for creating custom callbacks in LangChain
from databricks_langchain import ChatDatabricks                 # Databricks LLM interface
import pandas as pd                                             # Optional import (not used currently, but helpful for future enhancements)


class TokenUsageCallbackHandler(BaseCallbackHandler):
    """
    Custom callback handler to capture token usage details
    after an LLM response is generated and store them into a Delta table.
    """

    def __init__(self, user_id: str, application_id: str):
        """
        Constructor method that initializes user-specific context and table name.

        Parameters:
        user_id (str): Unique identifier of the logged-in user.
        application_id (str): Unique identifier for the application where LLM is being used.
        """
        self.user_id = user_id                                     # Store the user_id for persistent reference
        self.application_id = application_id                       # Store application identifier
        self.delta_table_name = "llm_observability.token_usage_audit"   # Delta table where records will be inserted


    def on_llm_end(self, response, **kwargs):
        """
        Method triggered automatically when the LLM finishes processing the request.
        This extracts token usage and writes a new row into the Delta table.

        Parameters:
        response: LLM response object which contains metadata including token usage.
        """

        # Extract the usage dictionary from the response metadata
        usage = response.llm_output.get("usage", {})

        # Retrieve token information; defaults to 0 if not available
        prompt_tokens = usage.get("prompt_tokens", 0)              # Number of input tokens
        completion_tokens = usage.get("completion_tokens", 0)      # Number of output tokens generated
        total_tokens = usage.get("total_tokens", 0)                # Combined total tokens = prompt + completion

        # Model name returned by the platform for tracking / billing
        model_name = response.llm_output.get("model_name", "Unknown")

        # SQL INSERT statement to record token usage into Delta Lake
        insert_query = f"""
        INSERT INTO {self.delta_table_name}
        VALUES (
            '{self.user_id}',             -- User ID from constructor
            '{self.application_id}',      -- Application ID from constructor
            '{model_name}',               -- Model used for response
             {prompt_tokens},             -- Input token count
             {completion_tokens},         -- Output token count
             {total_tokens},              -- Total token count
             current_timestamp()          -- Timestamp of the request
        )
        """

        # Execute SQL insert using Spark (Databricks runtime automatically manages transaction)
        spark.sql(insert_query)


    def on_llm_error(self, error, **kwargs):
        """
        Method triggered if LLM execution results in failure.
        Useful for alerting and debugging.
        """
        print(f"Error occurred during model execution: {error}")   # Print exception details for observability


In [0]:
chat = ChatDatabricks(endpoint="databricks-claude-sonnet-4-5")
custom_handler = TokenUsageCallbackHandler(user_id="436540", application_id="test_llm_observability")
prompt = "Who is the Prime Minister of India?"
response = chat.invoke(prompt, config={"callbacks": [custom_handler]})
response

AIMessage(content='As of my last update in April 2024, **Narendra Modi** is the Prime Minister of India. He has held this position since May 2014 and is serving his second consecutive term after his party, the Bharatiya Janata Party (BJP), won the general elections in 2019.', additional_kwargs={}, response_metadata={'usage': {'prompt_tokens': 15, 'completion_tokens': 70, 'total_tokens': 85}, 'prompt_tokens': 15, 'completion_tokens': 70, 'total_tokens': 85, 'model': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'model_name': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'finish_reason': 'stop'}, id='lc_run--019af8c9-bcac-7a20-8422-12b858cdd08f-0')

In [0]:
%sql
select * from llm_observability.token_usage_audit;

user_id,application_id,model_name,prompt_tokens,completion_tokens,total_tokens,created_at
436540,test_llm_observability,eu.anthropic.claude-sonnet-4-5-20250929-v1:0,15,70,85,2025-12-07T12:29:25.191Z


In [0]:
prompt = "What is capital of India?"
response = chat.invoke(prompt, config={"callbacks": [custom_handler]})
response

AIMessage(content='The capital of India is **New Delhi**.\n\nNew Delhi serves as the seat of all three branches of the Government of India and is located within the larger metropolitan area of Delhi.', additional_kwargs={}, response_metadata={'usage': {'prompt_tokens': 13, 'completion_tokens': 39, 'total_tokens': 52}, 'prompt_tokens': 13, 'completion_tokens': 39, 'total_tokens': 52, 'model': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'model_name': 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0', 'finish_reason': 'stop'}, id='lc_run--019af8d0-01fb-7882-b5aa-ad0ebb0f60a6-0')

In [0]:
%sql
select * from llm_observability.token_usage_audit;

user_id,application_id,model_name,prompt_tokens,completion_tokens,total_tokens,created_at
436540,test_llm_observability,eu.anthropic.claude-sonnet-4-5-20250929-v1:0,13,39,52,2025-12-07T12:36:15.845Z
436540,test_llm_observability,eu.anthropic.claude-sonnet-4-5-20250929-v1:0,15,70,85,2025-12-07T12:29:25.191Z
