In [4]:
from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.messages import HumanMessage
from langchain_core.outputs import LLMResult
from langchain_groq import ChatGroq
from typing import Dict,List,Any
import os

In [17]:
# class MyCustomSyncHandler(BaseCallbackHandler):
#     """Async callback handler that can be used to handle callbacks from langchain."""

    # def on_llm_start(
    #     self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    # ) -> None:
    #     """Run when chain starts running."""
    #     print("zzzz....")
    #     class_name = serialized["name"]
    #     print("Hi! I just woke up. Your llm is starting")
  

    # def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
    #     """Run when chain ends running."""
    #     print("zzzz....")
    #     print("Hi! I just woke up. Your llm is ending")
    
class MyCustomSyncHandler(BaseCallbackHandler):
    async def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")

In [32]:
llm = ChatGroq(
    api_key= os.getenv('GROQ_API_KEY'),
    model='mixtral-8x7B-32768',
    max_tokens=30,
    callbacks=[MyCustomSyncHandler()]
)
# from langchain_openai import ChatOpenAI

# llm = ChatOpenAI(
#     model='gpt-3.5-turbo',
#     api_key=os.getenv("OPENAI_API_KEY"),
#     max_tokens=30
# )

In [29]:
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.prompts import ChatPromptTemplate


class MyCustomHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"My custom handler, token: {token}")


prompt = ChatPromptTemplate.from_messages(["Tell me a joke about {animal}"])

# To enable streaming, we pass in `streaming=True` to the ChatModel constructor
# Additionally, we pass in our custom handler as a list to the callbacks parameter

chain = prompt | llm



In [30]:
response = chain.invoke({"animal": "bears"})

In [31]:
print(response)

content='Why do bears have hairy coats?\n\nFur protection!' response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 13, 'total_tokens': 24}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-c18f8909-cf93-4090-876d-4bd986e2e6f3-0' usage_metadata={'input_tokens': 13, 'output_tokens': 11, 'total_tokens': 24}


In [37]:
from langchain_core.callbacks import FileCallbackHandler, StdOutCallbackHandler
from langchain_core.prompts import PromptTemplate
from loguru import logger

logfile = "output.log"

logger.add(logfile, colorize=True, enqueue=True)
handler_1 = FileCallbackHandler(logfile)
handler_2 = StdOutCallbackHandler()

prompt = PromptTemplate.from_template("1 + {number} = ")

# this chain will both print to stdout (because verbose=True) and write to 'output.log'
# if verbose=False, the FileCallbackHandler will still write to 'output.log'
chain = prompt | llm

response = chain.invoke({"number": 2}, {"callbacks":  [handler_1,handler_2]})
logger.info(response)



[1m> Entering new RunnableSequence chain...[0m


[1m> Entering new PromptTemplate chain...[0m

[1m> Finished chain.[0m


[32m2024-08-01 20:43:22.065[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mcontent=" The sum of 1 + 2 is 3. Is there anything specific you would like to know about this mathematical operation? I'm" response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 16, 'total_tokens': 46, 'completion_time': 0.046979804, 'prompt_time': 0.002297612, 'queue_time': None, 'total_time': 0.049277416}, 'model_name': 'mixtral-8x7B-32768', 'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'length', 'logprobs': None} id='run-5d649132-700b-49aa-a2b2-fe2107f9f2c3-0' usage_metadata={'input_tokens': 16, 'output_tokens': 30, 'total_tokens': 46}[0m



[1m> Finished chain.[0m


In [38]:
from ansi2html import Ansi2HTMLConverter
from IPython.display import HTML, display

with open("output.log", "r") as f:
    content = f.read()

conv = Ansi2HTMLConverter()
html = conv.convert(content, full=True)

display(HTML(html))

In [41]:
from typing import Any, Dict, List, Union

from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.agents import AgentAction
from langchain_openai import OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

# First, define custom callback handler implementations
class MyCustomHandlerOne(BaseCallbackHandler):
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        print(f"on_llm_start {serialized['name']}")

    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
        print(f"on_new_token {token}")

    def on_llm_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when LLM errors."""

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        print(f"on_chain_start {serialized['name']}")

    def on_tool_start(
        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
    ) -> Any:
        print(f"on_tool_start {serialized['name']}")

    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
        print(f"on_agent_action {action}")


class MyCustomHandlerTwo(BaseCallbackHandler):
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        print(f"on_llm_start (I'm the second handler!!) {serialized['name']}")


# Instantiate the handlers
handler1 = MyCustomHandlerOne()
handler2 = MyCustomHandlerTwo()

# Setup the agent. Only the `llm` will issue callbacks for handler2
# llm = OpenAI(temperature=0, streaming=True, callbacks=[handler2])
llm = ChatGoogleGenerativeAI(api_key=os.getenv('GOOGLE_API_KEY'),model='gemini-1.5-flash')
tools = load_tools(["llm-math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)

# Callbacks for handler1 will be issued by every object involved in the
# Agent execution (llm, llmchain, tool, agent executor)
agent.run("What is 2 raised to the 0.235 power?", callbacks=[handler1])

on_chain_start AgentExecutor
on_chain_start LLMChain
on_llm_start ChatGoogleGenerativeAI
on_agent_action tool='Calculator' tool_input='2**0.235' log='Thought: I need to calculate 2 raised to the power of 0.235.\nAction: Calculator\nAction Input: 2**0.235'
on_tool_start Calculator
on_chain_start LLMMathChain
on_chain_start LLMChain
on_llm_start ChatGoogleGenerativeAI
on_chain_start LLMChain
on_llm_start ChatGoogleGenerativeAI


'1.1769067372187674'