In [49]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AgentAction
from langchain.chains import LLMChain
from langchain.agents import (
    AgentType,
    initialize_agent,
    load_tools
)
from langchain.callbacks import StdOutCallbackHandler
from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler
from langchain.callbacks.base import AsyncCallbackHandler

from typing import Dict, List, Union, Any
import asyncio
from langchain.schema import AgentAction, AgentFinish, LLMResult

## Setting APIs

In [41]:
with open("openai_api.txt", 'r') as f1, open("serpapi_api.txt", 'r') as f2:
    OPENAI_API = f1.read()
    SERPAPI_API = f2.read()

## Callbacks

LangChain porvides a `callback system` that allows us to hook into the various stages of the LLM application. This is usefull for **logging**, **monitoring**, **streaming**, and other stuff.

We can subscribe to these events using the `callback` argument. There are two main callbacks mechanisms:
* _Constructor Callbacks_ - defined in the constructor (LLMChain(callbacks=[handler])), which will be used for calls made on that object, and are scoped to that object only, (if we pass a handler to the LLMChain constructor, it will not be used by the model attached to that chain).

* _Request callbacks_ - defined in the call()/run()/apply(), which will be used for that specific request only, and all sub-requests that it contains (a call to an LLMChain triggers a call to a Model, which uses the same handler passed through).

In [25]:
## The base class of all Callback Handlers is the following

class BaseCallbackHandler:
    """Base callback handler that can be used to handle callbacks from langchain."""

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""

    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
        """Run on new LLM token. Only available when streaming is enabled."""

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
        """Run when LLM ends running."""

    def on_llm_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when LLM errors."""

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        """Run when chain starts running."""

    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
        """Run when chain ends running."""

    def on_chain_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when chain errors."""

    def on_tool_start(
        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
    ) -> Any:
        """Run when tool starts running."""

    def on_tool_end(self, output: str, **kwargs: Any) -> Any:
        """Run when tool ends running."""

    def on_tool_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when tool errors."""

    def on_text(self, text: str, **kwargs: Any) -> Any:
        """Run on arbitrary text."""

    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
        """Run on agent action."""

    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
        """Run on agent end."""

## When Using each Mechanism

* _Constructor Callbacks_ are most usefull for use cases such as logging, monitoring, which are not specific to a single request, but rather to the entire chain.

* _Request Callbacks_ are most usefull for use cases such as streaming, where we want to stream the output of a single request to a specific websocket connection, or other similar use cases.

## Using an Existing Handler

LangChain provides a few built-in handlers that we can use to get started. Thess are avaailable in the `langchain/callbacks` module. The most basic handler is the `StdOutCallbackHandler`, which simply logs all events to `stdout`.

In [9]:
# Setting the Handler (Constructor Callback)
handler = StdOutCallbackHandler()

# Setting the Chain
chain = LLMChain(
    llm = OpenAI(openai_api_key=OPENAI_API),
    prompt = PromptTemplate.from_template("1 + {number} = "),
    callbacks = [handler]
)

chain.run(number = 2)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n\n3'

In [10]:
## Setting the Verbose Chain

chain = LLMChain(
    llm = OpenAI(openai_api_key=OPENAI_API),
    prompt = PromptTemplate.from_template("1 + {number} = "),
    verbose = True
)

chain.run(number = 2)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n3'

In [14]:
## Using the Handler as a Request Callback

chain = LLMChain(
    llm = OpenAI(openai_api_key=OPENAI_API),
    prompt = PromptTemplate.from_template("1 + {number} = ")
)

chain.run(number=2, callbacks=[handler])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n\n3'

## Creating Custom Handler


In [18]:
class MyCustomHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"My custom handler, token: {token}")

# Enabling `streaming` mode
chat = ChatOpenAI(max_tokens=25, streaming=True, callbacks=[MyCustomHandler()], openai_api_key=OPENAI_API)

In [19]:
chat([HumanMessage(content="Tell me a joke")])

My custom handler, token: 
My custom handler, token: Why
My custom handler, token:  did
My custom handler, token:  the
My custom handler, token:  tomato
My custom handler, token:  turn
My custom handler, token:  red
My custom handler, token: ?


My custom handler, token: Because
My custom handler, token:  it
My custom handler, token:  saw
My custom handler, token:  the
My custom handler, token:  salad
My custom handler, token:  dressing
My custom handler, token: !
My custom handler, token: 


AIMessage(content='Why did the tomato turn red?\n\nBecause it saw the salad dressing!', additional_kwargs={}, example=False)

## Async Callbacks

When using the asynchronous API it's recommended to use the `AsyncCallbackHandler` to avoind blocking the runloop

In [29]:
class MyCustomSyncHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")

class MyCustomAsyncHandler(AsyncCallbackHandler):
    """Async callback handler that can be used to handle callbacks from langchain."""

    async def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> None:
        """Run when chain starts running."""
        print("zzzz....")
        await asyncio.sleep(2)
        class_name = serialized["name"]
        print("Hi! I just woke up. Your llm is starting")

    async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
        """Run when chain ends running."""
        print("zzzz....")
        await asyncio.sleep(2)
        print("Hi! I just woke up. Your llm is ending")

# To enable streaming, we pass in `streaming=True` to the ChatModel constructor
# Additionally, we pass in a list with our custom handler
chat = ChatOpenAI(max_tokens=25, streaming=True, callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()], openai_api_key=OPENAI_API)

await chat.agenerate([[HumanMessage(content="Tell me a joke")]])

zzzz....
Hi! I just woke up. Your llm is starting
Sync handler being called in a `thread_pool_executor`: token: 
Sync handler being called in a `thread_pool_executor`: token: Why
Sync handler being called in a `thread_pool_executor`: token:  did
Sync handler being called in a `thread_pool_executor`: token:  the
Sync handler being called in a `thread_pool_executor`: token:  tomato
Sync handler being called in a `thread_pool_executor`: token:  turn
Sync handler being called in a `thread_pool_executor`: token:  red
Sync handler being called in a `thread_pool_executor`: token: ?
Sync handler being called in a `thread_pool_executor`: token:  Because
Sync handler being called in a `thread_pool_executor`: token:  it
Sync handler being called in a `thread_pool_executor`: token:  saw
Sync handler being called in a `thread_pool_executor`: token:  the
Sync handler being called in a `thread_pool_executor`: token:  salad
Sync handler being called in a `thread_pool_executor`: token:  dressing
Sync h

LLMResult(generations=[[ChatGeneration(text='Why did the tomato turn red? Because it saw the salad dressing!', generation_info=None, message=AIMessage(content='Why did the tomato turn red? Because it saw the salad dressing!', additional_kwargs={}, example=False))]], llm_output={'token_usage': {}, 'model_name': 'gpt-3.5-turbo'})

## Using Multiple Handlers

In previous examples, we passed in callback handlers upon creating of an object suing `callback=[...]`. In this case, the callbacks will be scoped to that perticular object

However in many cases, it's advantageous to pass in handlers instead when running the object. When we pass through `CallbackHandlers` using the `callbacks` keyword when executing a run, those callbacks will be issued by all nested objects involved in the execution. 

For example when a handler is passed through to an `Agent`, it will be used for all callbacks related to the agent and all the objects involved in the agent's execution (Tools, Chains and LLm).

In [32]:
class MyCustomHandlerOne(BaseCallbackHandler):
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        print(f"on_llm_start {serialized['name']}")

    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
        print(f"on_new_token {token}")

    def on_llm_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when LLM errors."""

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        print(f"on_chain_start {serialized['name']}")

    def on_tool_start(
        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
    ) -> Any:
        print(f"on_tool_start {serialized['name']}")

    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
        print(f"on_agent_action {action}")


class MyCustomHandlerTwo(BaseCallbackHandler):
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        print(f"on_llm_start (I'm the second handler!!) {serialized['name']}")


# Instantiating the handlers
handler1 = MyCustomHandlerOne()
handler2 = MyCustomHandlerTwo()

# Seting up the agent (only the `llm` will issue callbacks for handler2)
llm = OpenAI(temperature=0, streaming=True, callbacks=[handler2], openai_api_key=OPENAI_API)
tools = load_tools(["llm-math"], llm=llm)
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

# Callbacks for handler1 will be issued by every object involved in the Agent execution (llm, llmchain, tool, agent executor)
agent.run("What is 2 raised to the 0.235 power?", callbacks=[handler1])

on_chain_start AgentExecutor
on_chain_start LLMChain
on_llm_start OpenAI
on_llm_start (I'm the second handler!!) OpenAI
on_new_token  I
on_new_token  need
on_new_token  to
on_new_token  use
on_new_token  a
on_new_token  calculator
on_new_token  to
on_new_token  solve
on_new_token  this
on_new_token .
on_new_token 
Action
on_new_token :
on_new_token  Calculator
on_new_token 
Action
on_new_token  Input
on_new_token :
on_new_token  2
on_new_token ^
on_new_token 0
on_new_token .
on_new_token 235
on_new_token 
on_agent_action AgentAction(tool='Calculator', tool_input='2^0.235', log=' I need to use a calculator to solve this.\nAction: Calculator\nAction Input: 2^0.235')
on_tool_start Calculator
on_chain_start LLMMathChain
on_chain_start LLMChain
on_llm_start OpenAI
on_llm_start (I'm the second handler!!) OpenAI
on_new_token 
on_new_token ```text
on_new_token 

on_new_token 2
on_new_token **
on_new_token 0
on_new_token .
on_new_token 235
on_new_token 

on_new_token ```

on_new_token ...
on_ne

'1.1769067372187674'

## Streaming Only the Output

By default LangChains assume that the token sequence `Final, ` `Answer`, `:` indecates that the agent has reached an answer. We can manually set the an answer sequence in the `answer_prefix_tokens` argument.

For convenience, the callback automatically strips whitespaces and new line characters when comparing to `answer_prefix_tokens`

In [54]:
llm = OpenAI(temperature=0, streaming=True, openai_api_key=OPENAI_API, callbacks=[FinalStreamingStdOutCallbackHandler()])
tools = load_tools(["llm-math", "serpapi"], llm=llm, serpapi_api_key=SERPAPI_API)
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

res = agent.run("Expain me what democracy is")

 Democracy is a form of government in which the people have the authority to deliberate and decide legislation, or to choose governing officials to do so. Examples of democracy include voting in elections and contacting elected officials.

In [60]:
## Seeing what the ideal `answer_prefix_tokens` is:

class MyCallbackHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token, **kwargs) -> None:
        # print every token on a new line
        print(f"#{token}#")

llm = OpenAI(streaming=True, callbacks=[MyCallbackHandler()], openai_api_key=OPENAI_API)
tools = load_tools(["wikipedia", "llm-math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)
agent.run("It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.")

# I#
# need#
# to#
# find#
# out#
# when#
# he#
# became#
# Chancellor#
#.#
#
Action#
#:#
# Wikipedia#
#
Action#
# Input#
#:#
# Kon#
#rad#
# Aden#
#auer#
##
# I#
# now#
# know#
# that#
# Kon#
#rad#
# Aden#
#auer#
# became#
# Chancellor#
# of#
# Germany#
# in#
# 1949#
#.#
#
Final#
# Answer#
#:#
# Kon#
#rad#
# Aden#
#auer#
# became#
# Chancellor#
# of#
# Germany#
# in#
# 1949#
#,#
# 74#
# years#
# ago#
#.#
##


'Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago.'

here is [`Final` `Answer`, `:`]

In [62]:
llm = OpenAI(streaming=True, callbacks=[FinalStreamingStdOutCallbackHandler(answer_prefix_tokens=["Final", "Answer", ":"])], openai_api_key=OPENAI_API)
tools = load_tools(["wikipedia", "llm-math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)
agent.run("It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.")

 Konrad Adenauer became Chancellor of Germany in 1949, which is 74 years ago from 2023.

'Konrad Adenauer became Chancellor of Germany in 1949, which is 74 years ago from 2023.'