## Setup Environment

In [1]:
# %%capture
# %pip install langchain
# %pip install langchain-hub
# %pip install langchain-community langchain-huggingface
# %pip install huggingface_hub transformers
# %pip install sentence_transformers==2.2.2
# %pip install chromadb faiss accelerate
# %pip install -U bitsandbytes
# %pip install tiktoken python-dotenv
# %pip install faiss-gpu
# %pip install InstructorEmbedding docarray
# %pip install langchain_experimental
# %pip install wikipedia
# %pip install numexpr

## Modules

In [2]:
# llm modules
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from transformers import GenerationConfig
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

# prompt modules
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import SystemMessage

import warnings

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


## Prompt Template

In [3]:
prompt = ChatPromptTemplate(
    input_variables=['agent_scratchpad', 'input'],
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=[],
                template=(
                    '<s>[INST]Answer the following questions as best you can. You have access to the following tools:\n\n'
                    'wikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. '
                    'Input should be a search query.\n'
                    'Calculator - Useful for math-related questions.\n\n'
                    'The way you use the tools is by specifying a json blob.\n'
                    'Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n\n'
                    'The only values that should be in the "action" field are: wikipedia, Calculator\n\n'
                    'The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions.'
                    'Here is an example of a valid $JSON_BLOB:\n\n'
                    '```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\n'
                    'ALWAYS use the following format:\n\n'
                    'Question: the input question you must answer\n'
                    'Thought: you should always think about what to do\n'
                    'Action:\n```\n$JSON_BLOB\n```\n'
                    'Observation: the result of the action\n... (this Thought/Action/Observation can repeat N times)\n'
                    'Thought: I now know the final answer\n'
                    'Final Answer: the final answer to the original input question\n\n'
                    'Begin! Reminder to always use the exact characters `Final Answer` when responding.'
                )
            )
        ),
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['agent_scratchpad', 'input'],
                template='{input}\n\n{agent_scratchpad}[/INST]'
                )
        )
    ]
)

In [4]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='<s>[INST]Answer the following questions as best you can. You have access to the following tools:\n\nwikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\nCalculator - Useful for math-related questions.\n\nThe way you use the tools is by specifying a json blob.\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n\nThe only values that should be in the "action" field are: wikipedia, Calculator\n\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions.Here is an example of a valid $JSON_BLOB:\n\n```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\nALWAYS us

## LLM

In [5]:
model_name = "minkhantycc/Llama-2-7b-chat-finetune-quantized"
# model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device_map = {"": 0}

# bnb config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
base_model.config.use_cache = False

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model.generation_config = GenerationConfig(
    max_new_tokens = 256,
    temperature = 0.00000001,
    repetition_penalty = 1.15,
    do_sample = False,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.eos_token_id,
)

# pipeline
pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    device_map=device_map,
    return_full_text=False
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:12<00:00,  6.18s/it]


In [6]:
# llm
hf_pipe = HuggingFacePipeline(pipeline=pipe).bind(stop=["\nObservation:", "\n\tObservation:", 'Question:', '</s>', '\n\n'])

## Built-in Tools

In [7]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_experimental.utilities.python import PythonREPL

In [8]:
## math and wikipedia tools
tools = load_tools(["llm-math", "wikipedia"], llm=hf_pipe)

# initialize agent
agent = initialize_agent(
    tools,
    hf_pipe,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Answer the following questions as best you can. You have access to the following tools:\n\nCalculator: Useful for when you need to answer questions about math.\nwikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\n\nThe way you use the tools is by specifying a json blob.\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n\nThe only values that should be in the "action" field are: Calculator, wikipedia\n\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n\n```\n{{\n  "action": $TOOL_NAME,\n  "action_

In [9]:
# reformat the prompt
agent.agent.llm_chain.prompt = prompt
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='<s>[INST]Answer the following questions as best you can. You have access to the following tools:\n\nwikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\nCalculator - Useful for math-related questions.\n\nThe way you use the tools is by specifying a json blob.\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n\nThe only values that should be in the "action" field are: wikipedia, Calculator\n\nThe $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions.Here is an example of a valid $JSON_BLOB:\n\n```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\nALWAYS us

In [10]:
import langchain
langchain.debug=True

# math agent test
result = agent("What is the 25% of 300?")
langchain.debug=False
result

[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "What is the 25% of 300?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "What is the 25% of 300?",
  "agent_scratchpad": "",
  "stop": [
    "Observation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[chain:AgentExecutor > chain:LLMChain > llm:HuggingFacePipeline] Entering LLM run with input:
[0m{
  "prompts": [
    "System: <s>[INST]Answer the following questions as best you can. You have access to the following tools:\n\nwikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\nCalculator - Useful for math-related questions.\n\nThe way you use the tools is by specifying a json blob.\nSpecifically, this json should have a `action` key (with the name of the tool to use) and a `action_input`

{'input': 'What is the 25% of 300?', 'output': '75'}

In [11]:
# wikipedia agent test
question = "Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?"
result = agent(question)
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCould not parse LLM output:  The book written by Tom M. Mitchell is titled "Artificial Intelligence: A Modern Approach". [0m
Observation: Invalid or incomplete response
Thought:[32;1m[1;3mCould not parse LLM output: I am unable to determine which book Tom M. Mitchell wrote without more information. Could you provide me with additional context or details regarding his work? [0m
Observation: Invalid or incomplete response
Thought:[32;1m[1;3mCould not parse LLM output: I cannot determine which book Tom M. Mitchell wrote without more information. Please provide me with additional context or details regarding his work so I may better assist you. [0m
Observation: Invalid or incomplete response
Thought:[32;1m[1;3mTom M. Mitchell is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU). He has written several books on artificial intelligence, including "Artificial Intelligen

{'input': 'Tom M. Mitchell is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU)what book did he write?',
 'output': 'Artificial Intelligence: A Modern Approach'}

## Python Agent

In [None]:
# python agent
agent = create_python_agent(
    hf_pipe,
    tool=PythonREPLTool(),
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    agent_executor_kwargs = {"handle_parsing_errors":True}
)

agent.agent.llm_chain.prompt

In [42]:
python_prompt = PromptTemplate(
    input_variables=['agent_scratchpad', 'input'],
    partial_variables={
        'tool': "Python REPL - A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
        'tool_name': 'Python REPL'
        },
    template=(
        '<s>[INST]<<SYS>>You are an agent designed to write and execute python code to answer questions.\n'
        'You have access to {tool_name}, which you can use to execute python code.\n'
        'If you get an error, debug your code and try again.\n'
        'Only use the output of your code to answer the question. \n'
        'You might know the answer without running any code, but you should still run the code to get the answer.\n'
        'If it does not seem like you can write code to answer the question, just return "I don\'t know" as the answer.\n\n\n'
        '{tool_name} - {tool}\n\n'
        'Use the following format:\n\n'
        'Question: the input question you must answer\n'
        'Thought: you should always think about what to do\n'
        'Action: the action to take, should be one of [tool_name]\n'
        'Action Input: the input to the action\n'
        'Observation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\n'
        'Thought: I now know the final answer\n'
        'Final Answer: the final answer to the original input question\n\n'
        'Begin!\n\n<</SYS>>'
        'Question: {input}\nThought:{agent_scratchpad}[/INST]'
    )
)

In [None]:
# agent.agent.llm_chain.prompt = python_prompt
agent.agent.llm_chain.prompt

In [None]:
# input to python
customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]

langchain.debug=True
result = agent.run(
    f"""Sort these custormers by last name and then first name \
    and print the output: {customer_list}"""
)
langchain.debug=False
result