In [11]:
import asyncio
import sys
import time
import logging
import typing as ty

import aioitertools
from langchain.input import get_color_mapping
from langchain.agents import AgentExecutor
from langchain.agents.agent import AgentAction, AgentFinish
from langchain.callbacks.manager import AsyncCallbackManagerForChainRun
from langchain.utilities.asyncio import asyncio_timeout

sys.path.insert(0, "..")
from codeine.chatbot import build_chat_engine, service_context


logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

chat_engine = build_chat_engine()

In [23]:
async def intermediate_steps_generator(
    self,
    inputs: dict[str, str],
    run_manager: ty.Optional[AsyncCallbackManagerForChainRun] = None,
) -> ty.AsyncIterator[tuple[AgentAction, str]]:
    """Generator function that yields intermediate steps (thoughts, actions, and observations)."""

    logger.debug("Starting generator")

    name_to_tool_map = {tool.name: tool for tool in self.tools}
    color_mapping = get_color_mapping(
        [tool.name for tool in self.tools], excluded_colors=["green"]
    )
    intermediate_steps: List[Tuple[AgentAction, str]] = []
    iterations = 0
    time_elapsed = 0.0
    start_time = time.time()

    async with asyncio_timeout(self.max_execution_time):
        try:
            while self._should_continue(iterations, time_elapsed):
                logger.debug(f"Iteration {iterations}")

                next_step_output = await self._atake_next_step(
                    name_to_tool_map,
                    color_mapping,
                    inputs,
                    intermediate_steps,
                    run_manager=run_manager,
                )
                if isinstance(next_step_output, AgentFinish):
                    # Yield the final answer
                    final_answer = next_step_output.return_values["output"]
                    yield next_step_output, final_answer
                    logger.debug("Agent finished")
                    break

                intermediate_steps.extend(next_step_output)
                if len(next_step_output) == 1:
                    next_step_action = next_step_output[0]
                    tool_return = self._get_tool_return(next_step_action)
                    if tool_return is not None:
                        logger.debug("Tool returned")
                        break

                # Yield the latest intermediate step(s)
                for step_output in next_step_output:
                    logger.debug(f"Yielding step: {step_output}")
                    yield step_output

                iterations += 1
                time_elapsed = time.time() - start_time
        except TimeoutError:
            logger.debug("TimeoutError")
            pass
    logger.debug("Generator finished")

AgentExecutor.intermediate_steps_generator = intermediate_steps_generator

In [4]:
async def _acall(
    self,
    inputs: dict[str, str],
    run_manager: ty.Optional[AsyncCallbackManagerForChainRun] = None,
) -> dict[str, str]:
    """Run text through and get agent response."""
    intermediate_steps: list[tuple[AgentAction, str]] = []

    async for step in self.intermediate_steps_generator(inputs, run_manager):
        intermediate_steps.append(step)

    output = self.agent.return_stopped(intermediate_steps)
    return output

AgentExecutor._acall = _acall

In [9]:
from langchain.agents import Tool

async def _atake_next_step(
    self,
    name_to_tool_map: dict[str, Tool],
    color_mapping: dict[str, str],
    inputs: dict[str, str],
    intermediate_steps: list[tuple[AgentAction, str]],
    run_manager: ty.Optional[AsyncCallbackManagerForChainRun] = None,
) -> list[tuple[AgentAction, str]] | AgentFinish:
    """Take a single step in the thought-action-observation loop."""

    logger.debug("Taking next step")

    try:
        # Call the LLM to see what to do.
        output = await self.agent.aplan(
            intermediate_steps,
            callbacks=run_manager.get_child() if run_manager else None,
            **inputs,
        )
    except OutputParserException as e:
        # Handle parsing errors (omitted for brevity)
        pass

    logger.debug(f"Agent output: {output}")

    # If the tool chosen is the finishing tool, then we end and return.
    if isinstance(output, AgentFinish):
        return output
    actions: List[AgentAction]
    if isinstance(output, AgentAction):
        actions = [output]
    else:
        actions = output

    async def _aperform_agent_action(
        agent_action: AgentAction,
    ) -> tuple[AgentAction, str]:
        if run_manager:
            await run_manager.on_agent_action(
                agent_action, verbose=self.verbose, color="green"
            )
        # Otherwise we lookup the tool
        if agent_action.tool in name_to_tool_map:
            tool = name_to_tool_map[agent_action.tool]
            return_direct = tool.return_direct
            color = color_mapping[agent_action.tool]
            tool_run_kwargs = self.agent.tool_run_logging_kwargs()
            if return_direct:
                tool_run_kwargs["llm_prefix"] = ""
            # We then call the tool on the tool input to get an observation
            observation = await tool.arun(
                agent_action.tool_input,
                verbose=self.verbose,
                color=color,
                callbacks=run_manager.get_child() if run_manager else None,
                **tool_run_kwargs,
            )
        else:
            tool_run_kwargs = self.agent.tool_run_logging_kwargs()
            observation = await InvalidTool().arun(
                agent_action.tool,
                verbose=self.verbose,
                color=None,
                callbacks=run_manager.get_child() if run_manager else None,
                **tool_run_kwargs,
            )
        return agent_action, observation

    # Use asyncio.gather to run multiple tool.arun() calls concurrently
    result = await asyncio.gather(
        *[_aperform_agent_action(agent_action) for agent_action in actions]
    )

    return list(result)


AgentExecutor._atake_next_step = _atake_next_step

In [25]:
chat_engine._agent.return_intermediate_steps = True
chat_engine._agent.verbose = False

In [26]:
chat_engine._service_context.llm_predic tor.llm

ChatOpenAI(verbose=False, callbacks=None, callback_manager=None, tags=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None)

In [27]:
inputs = {
    "input": "How does TinyViT work?",
    "chat_history" : []
}

async for ix, step in aioitertools.enumerate(chat_engine._agent.intermediate_steps_generator(
    inputs,
)):
    logging.info("==========================================")
    logging.info(f"Step: {ix}")
    if isinstance(step, str): # agent is finished
        text = step
    else: # agent has a tool to use
        action, text = step
        logging.info(f"Action: {action}")
    logging.info(f"Text: {text}")
    logging.info("==========================================")



DEBUG:__main__:Starting generator
DEBUG:__main__:Iteration 0
DEBUG:__main__:Taking next step
DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/chat/completions
DEBUG:openai:api_version=None data='{"messages": [{"role": "system", "content": "Assistant is a large language model trained by OpenAI.\\n\\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\\n\\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of

DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/chat/completions HTTP/1.1" 200 None
DEBUG:openai:message='OpenAI API response' path=https://api.openai.com/v1/chat/completions processing_ms=2800 request_id=a4d124a50406df217bcea15c21e79357 response_code=200
DEBUG:llama_index.llm_predictor.base:What is TinyViT?

TinyViT is a family of tiny and efficient vision transformers pretrained on large-scale datasets with a proposed fast distillation framework, which transfers knowledge from large pretrained models to small ones. It achieves high accuracy with only 21M parameters and is designed for neural architecture search, tiny transformer design, and model compression projects.
DEBUG:llama_index.indices.response.base_builder:> Initial prompt template: Context information is below. 
---------------------
file_path: TinyViT/README.md
file_name: README.md

# TinyViT: Fast Pretraining Distillation for Small Vision Transformers [![Tweet](https://img.shields.io/twitter/url/http/shi

DEBUG:llama_index.indices.response.base_builder:> Initial response: What is TinyViT?

TinyViT is a family of tiny and efficient vision transformers pretrained on large-scale datasets with a proposed fast distillation framework, which transfers knowledge from large pretrained models to small ones. It achieves high accuracy with only 21M parameters and is designed for neural architecture search, tiny transformer design, and model compression projects.
DEBUG:llama_index.indices.response.refine:> Refine context: depth={self.depth}"


class TinyViT(nn.Module):...
DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/chat/completions
DEBUG:openai:api_version=None data='{"messages": [{"role": "user", "content": "TinyViT"}, {"role": "assistant", "content": "What is TinyViT?\\n\\nTinyViT is a family of tiny and efficient vision transformers pretrained on large-scale datasets with a proposed fast distillation framework, which transfers knowledge from large pretr

INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/chat/completions processing_ms=5881 request_id=39856748fb402b641e9de6897e945fed response_code=200
DEBUG:__main__:Agent output: AgentFinish(return_values={'output': 'TinyViT is a neural network architecture designed for computer vision tasks. It is a family of tiny and efficient vision transformers that are pretrained on large-scale datasets. The architecture has only 21M parameters and is designed for neural architecture search, tiny transformer design, and model compression projects. The TinyViT model has several hyperparameters, including the number of embedding dimensions, depths, number of heads, window sizes, and drop rates. These hyperparameters can be adjusted to optimize the model for specific tasks. Additionally, TinyViT supports a fast distillation framework that transfers knowledge from large pretrained models to small ones.'}, log='{\n    "action": "Final Answer",\n    "action_input": "TinyViT is a neu

In [64]:
step

"Training TinyViT, a smaller version of the Vision Transformer (ViT) model, involves a similar process to training the original ViT. The main steps include: 1. Preparing a dataset of images and their corresponding labels. 2. Initializing the TinyViT model with a smaller architecture compared to the original ViT. 3. Feeding the images through the model and computing the loss based on the model's predictions and the true labels. 4. Updating the model's weights using an optimization algorithm, such as Adam or SGD, to minimize the loss. 5. Repeating steps 3 and 4 for multiple epochs until the model converges or reaches a satisfactory performance level. The primary difference between TinyViT and the original ViT is the model's size, which makes TinyViT more efficient and faster to train, while still maintaining competitive performance on various computer vision tasks."