<a href="https://colab.research.google.com/github/ResByte/llm-notebooks/blob/main/notebooks/03_Agents_llama_index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLM Agents

Using llama-cpp-python load an existing mistral-7b model. This model is downloaded from HF and is in gguf format(updated format from ggml)

In [1]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -q llama-cpp-python

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone


In [2]:
!pip install -q llama-index

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.8/15.8 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.0/143.0 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.4/225.4 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependenc

Model is present in the HF hub.

In [3]:
model_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true"

In [4]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

In [5]:
llm = LlamaCPP(
    model_url=model_url,
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={'n_gpu_layers':1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

Downloading url https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true to path /tmp/llama_index/models/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true
total size (MB): 4368.44


4167it [00:30, 135.98it/s]                          
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [6]:
response = llm.complete("Hello! Can you tell me a poem about cats and dogs?")

In [7]:
print(response.text)

 Sure, here's a short poem about cats and dogs:

Cats and dogs, they both have fur,
But cats are more independent, that's for sure.
Dogs love to play and run around,
While cats prefer to lounge and be found.

Cats are known for their graceful leaps,
And dogs are great at fetching sticks and balls.
Both animals have unique personalities,
And make wonderful companions for us all.

So whether you prefer cats or dogs,
Just remember to give them lots of love and hugs.


In [8]:
import nest_asyncio
nest_asyncio.apply()

In [9]:
# define calculation tool for our agent
def multiply(a: int, b: int)->int:
    return a * b

def add(a: int, b: int)->int:
    return a + b

In [15]:
from llama_index.tools import BaseTool, FunctionTool

add_tool = FunctionTool.from_defaults(fn=add)
multiply_tool = FunctionTool.from_defaults(fn=multiply)

In [12]:
from typing import Sequence, List
from llama_index.llms  import ChatMessage

In [30]:
class CustomAgent:
    def __init__(
            self,
            llm,
            tools=[],
            chat_history=[]
            ):
        self._llm = llm
        self._tools = {tool.metadata.name: tool for tool in tools}
        self._chat_history = chat_history

    def reset(self):
        self._chat_history = []

    def chat(self, message:str):
        chat_history = self._chat_history
        chat_history.append(ChatMessage(role='user', content=message))

        tools = [
            tool.metadata.to_openai_tool() for _, tool in self._tools.items()
            ]

        generated_message = self._llm.chat(chat_history, tools=tools).message
        additional_kwargs = generated_message.additional_kwargs
        chat_history.append(generated_message)

        tool_calls = generated_message.additional_kwargs.get("tool_calls", None)
        if tool_calls is not None:
            for tool_call in tool_calls:
                function_message = self._call_function(tool_call)
                chat_history.append(function_message)
                generated_message = self._llm.chat(chat_history).message
                chat_history.append(generated_message)
        return generated_message.content

    def _call_function(self, tool_call):
        id_ = tool_call['id']
        function_call = tool_call['function']
        tool = self._tools[function_call['name']]
        output = tool(**json.loads(function_call['arguments']))
        print(f"Calling function: {function_call['name']}")
        return ChatMessage(
            name=function_call['name'],
            content = str(output),
            role='tool',
            additional_kwargs={
                'tool_call_id': id_,
                'name': function_call['name']
            }
        )


In [31]:
agent = CustomAgent(llm=llm, tools=[multiply_tool, add_tool])

In [32]:
agent.chat('Hi')

Llama.generate: prefix-match hit


' Hello! How can I assist you today?'

In [33]:
agent.chat("What is 2123 * 215123")

Llama.generate: prefix-match hit


' The result of multiplying 2123 by 215123 is 470689199.'

In [28]:
agent.chat("The answer is wrong")

Llama.generate: prefix-match hit


' I apologize for the mistake in my previous response. The correct answer to your question is:\n\nThe result of multiplying 2123 by 215123 is 46089749.'

In [34]:
agent.chat("What is (121 * 3) + 42?")

Llama.generate: prefix-match hit


' The expression (121 \\* 3) + 42 evaluates to 363 + 42, which equals 405.'