## Setup Environment

In [1]:
# %%capture
# %pip install langchain
# %pip install langchain-hub
# %pip install langchain-community langchain-huggingface
# %pip install huggingface_hub transformers
# %pip install sentence_transformers==2.2.2
# %pip install chromadb faiss accelerate
# %pip install -U bitsandbytes
# %pip install tiktoken python-dotenv
# %pip install faiss-gpu
# %pip install InstructorEmbedding docarray
# %pip install langchain_experimental
# %pip install wikipedia
# %pip install numexpr

## Import modules

In [1]:
# llm modules
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from transformers import GenerationConfig
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

# prompt modules
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import SystemMessage

import warnings

warnings.filterwarnings("ignore")

## Load Llama Quantized Model

In [2]:
model_name = "minkhantycc/Llama-2-7b-chat-finetune-quantized"
# model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device_map = {"": 0}

# bnb config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model.generation_config = GenerationConfig(
    max_new_tokens = 256,
    temperature = 0.000001,
    repetition_penalty = 1.15,
    do_sample = False,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.eos_token_id,
)

# pipeline
pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    device_map=device_map,
    return_full_text=False
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Create A Prompt Template

In [3]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate

prompt = ChatPromptTemplate(
    input_variables=['input'],
    partial_variables={
        'agent_scratchpad': "",
        'tools': (
            'wikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. '
            'Input should be a search query.\n'
            'Calculator - Useful for math-related questions.'
        ),
        'tool_names': 'wikipedia, Calculator'
    },
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['tool_names', 'tools'],
                template=(
                    '<s>[INST]Answer the following questions as best you can. You have access to the following tools:\n\n'
                    '{tools}\n\n'
                    'Use the tools by specifying a JSON blob with an `action` key (tool name) and an `action_input` key (input to the tool).\n\n'
                    'The "action" field should only contain: {tool_names}.\n\n'
                    'The $JSON_BLOB should only contain a SINGLE action. Do NOT return a list of actions. Here is an example:\n\n'
                    '```\n{{\n  "action": "$TOOL_NAME",\n  "action_input": "$INPUT"\n}}\n```\n\n'
                    'ALWAYS follow this format:\n\n'
                    'Question: the input question you must answer\n'
                    'Thought: consider what action to take\n'
                    'Action:\n'
                    '```\n$JSON_BLOB\n```\n'
                    'Observation: the result of the action\n'
                    '... (repeat Thought/Action/Observation as needed)\n'
                    'Thought: I now know the final answer\n'
                    'Final Answer: the final answer to the original input question\n\n'
                    'Begin! Always use the exact phrase `Final Answer` in your response.'
                )
            )
        ),
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['input', 'agent_scratchpad'],
                template='{input}\n\n{agent_scratchpad}[/INST]'
            )
        )
    ]
)


## Create A Custom Pipeline

In [4]:
from pydantic import BaseModel
class CustomHuggingFacePipeline(HuggingFacePipeline):
    prompt_template: ChatPromptTemplate = None

    def __post_init__(self):
        super().__post_init__()

    def _generate(self, inputs, **kwargs):
        if self.prompt_template:
            # Reformat the input text using the prompt template
            inputs = self.prompt_template.format(input=inputs)
        return super()._generate(prompts=[inputs], **kwargs)

In [5]:
# llm
hf_pipe = CustomHuggingFacePipeline(pipeline=pipe, prompt_template=prompt)

## Test Agent

In [6]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_experimental.utilities.python import PythonREPL

In [7]:
## math and wikipedia tools
tools = load_tools(["llm-math", "wikipedia"], llm=hf_pipe)

# initialize agent
agent = initialize_agent(
    tools,
    hf_pipe,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

import langchain
langchain.debug=True
# math agent test
result = agent("What is the 25% of 300?")
langchain.debug=False
result

[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "What is the 25% of 300?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "What is the 25% of 300?",
  "agent_scratchpad": "",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[chain:AgentExecutor > chain:LLMChain > llm:CustomHuggingFacePipeline] Entering LLM run with input:
[0m{
  "prompts": [
    "Answer the following questions as best you can. You have access to the following tools:\n\nCalculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.\nwikipedia - A wrapper around Wikipedia. Useful for whe

In [8]:
# wikipedia agent test
question = "Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?"
result = agent(question)
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Final Answer: The book written by Tom M. Mitchell is "Artificial Intelligence: A Modern Approach".[0m

[1m> Finished chain.[0m


{'input': 'Tom M. Mitchell is an American computer scientist and the Founders University Professor at Carnegie Mellon University (CMU)what book did he write?',
 'output': 'The book written by Tom M. Mitchell is "Artificial Intelligence: A Modern Approach".'}

In [9]:
# python agent
agent = create_python_agent(
    hf_pipe,
    tool=PythonREPLTool(),
    verbose=True
)

# input to python
customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]

agent.run(
    f"""Sort these custormers by last name and then first name \
    and print the output: {customer_list}"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Final Answer: The customers are sorted alphabetically according to their last names followed by their first names.

Customers:

1. Harrison Chase
2. Lang Chain
3. Dolly Too
4. Elle Elem
5. Geoff Fusion
6. Trance Former
7. Jen Ayai[0m

[1m> Finished chain.[0m


'The customers are sorted alphabetically according to their last names followed by their first names.\n\nCustomers:\n\n1. Harrison Chase\n2. Lang Chain\n3. Dolly Too\n4. Elle Elem\n5. Geoff Fusion\n6. Trance Former\n7. Jen Ayai'