<a href="https://colab.research.google.com/github/ResByte/llm-notebooks/blob/main/notebooks/03_Agents_llama_index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLM Agents

Using llama-cpp-python load an existing mistral-7b model. This model is downloaded from HF and is in gguf format(updated format from ggml)

## Installations

In [1]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -q llama-cpp-python

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone


In [2]:
!pip install -q llama-index

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.8/15.8 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.0/143.0 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.4/225.4 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependenc

In [6]:
!pip install -q transformers

## Setup Model

In [3]:
model_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"

In [4]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

In [7]:
from llama_index import set_global_tokenizer
from transformers import AutoTokenizer

In [8]:
# set tokenizer according to mistral model
set_global_tokenizer(
    AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1").encode
)

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [26]:
# for api: https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
llm = LlamaCPP(
    model_url=model_url,
    model_path=None,
    temperature=0.1,
    max_new_tokens=512,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={'n_gpu_layers':-1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [27]:
response = llm.complete("Hello! Can you tell me a poem about cats and dogs?")

In [28]:
print(response.text)

 Sure, here's a short poem about cats and dogs:

Cats and dogs, they both have fur,
But cats are more independent, that's for sure.
Dogs love to play and run around,
While cats prefer to lounge and be found.

Cats are known for their grace and poise,
While dogs are always ready to please.
Both animals make great companions,
And they bring joy to our lives in different ways.

So whether you prefer cats or dogs,
Just remember to give them lots of love and logs.


In [12]:
import nest_asyncio
nest_asyncio.apply()

## Agent Build
This will be Reason and Act Agent(ReAct)

In [13]:
from llama_index.agent import ReActAgent

In [14]:
# define calculation tool for our agent
def multiply(a: int, b: int)->int:
    return a * b

def add(a: int, b: int)->int:
    return a + b

In [15]:
from llama_index.tools import BaseTool, FunctionTool

add_tool = FunctionTool.from_defaults(fn=add)
multiply_tool = FunctionTool.from_defaults(fn=multiply)

In [29]:
agent = ReActAgent.from_tools([multiply_tool, add_tool], llm=llm, verbose=True)

In [30]:
agent.chat("What is 2123 * 215123")

Llama.generate: prefix-match hit


[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: multiply
Action Input: {'a': 2123, 'b': 215123}
[0m[1;3;34mObservation: 456706129
[0m

Llama.generate: prefix-match hit


[1;3;38;5;200mThought: I can answer without using any more tools.
Response: 456706129
[0m

AgentChatResponse(response='456706129', sources=[ToolOutput(content='456706129', tool_name='multiply', raw_input={'args': (), 'kwargs': {'a': 2123, 'b': 215123}}, raw_output=456706129)], source_nodes=[])

In [32]:
agent.chat("What is (121 * 3) + 42 ")

Llama.generate: prefix-match hit


[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Response:  189
[0m

AgentChatResponse(response=' 189', sources=[], source_nodes=[])

In [24]:
agent.chat("What is (1200 * 3) + (4*3000)")

Llama.generate: prefix-match hit


[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Response:    3960 
[0m

AgentChatResponse(response='   3960 ', sources=[], source_nodes=[])

In [25]:
agent.chat("This is wrong")

Llama.generate: prefix-match hit


[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Response:  I apologize for the mistake. Can you please provide me with the correct answer?
[0m

AgentChatResponse(response=' I apologize for the mistake. Can you please provide me with the correct answer?', sources=[], source_nodes=[])

## Custom Agent

In [12]:
from typing import Sequence, List
from llama_index.llms  import ChatMessage

In [30]:
class CustomAgent:
    def __init__(
            self,
            llm,
            tools=[],
            chat_history=[]
            ):
        self._llm = llm
        self._tools = {tool.metadata.name: tool for tool in tools}
        self._chat_history = chat_history

    def reset(self):
        self._chat_history = []

    def chat(self, message:str):
        chat_history = self._chat_history
        chat_history.append(ChatMessage(role='user', content=message))

        tools = [
            tool.metadata.to_openai_tool() for _, tool in self._tools.items()
            ]

        generated_message = self._llm.chat(chat_history, tools=tools).message
        additional_kwargs = generated_message.additional_kwargs
        chat_history.append(generated_message)

        tool_calls = generated_message.additional_kwargs.get("tool_calls", None)
        if tool_calls is not None:
            for tool_call in tool_calls:
                function_message = self._call_function(tool_call)
                chat_history.append(function_message)
                generated_message = self._llm.chat(chat_history).message
                chat_history.append(generated_message)
        return generated_message.content

    def _call_function(self, tool_call):
        id_ = tool_call['id']
        function_call = tool_call['function']
        tool = self._tools[function_call['name']]
        output = tool(**json.loads(function_call['arguments']))
        print(f"Calling function: {function_call['name']}")
        return ChatMessage(
            name=function_call['name'],
            content = str(output),
            role='tool',
            additional_kwargs={
                'tool_call_id': id_,
                'name': function_call['name']
            }
        )


In [31]:
agent = CustomAgent(llm=llm, tools=[multiply_tool, add_tool])

In [32]:
agent.chat('Hi')

Llama.generate: prefix-match hit


' Hello! How can I assist you today?'

In [33]:
agent.chat("What is 2123 * 215123")

Llama.generate: prefix-match hit


' The result of multiplying 2123 by 215123 is 470689199.'

In [28]:
agent.chat("The answer is wrong")

Llama.generate: prefix-match hit


' I apologize for the mistake in my previous response. The correct answer to your question is:\n\nThe result of multiplying 2123 by 215123 is 46089749.'

In [34]:
agent.chat("What is (121 * 3) + 42?")

Llama.generate: prefix-match hit


' The expression (121 \\* 3) + 42 evaluates to 363 + 42, which equals 405.'