In [None]:
# llm modules
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from transformers import GenerationConfig
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

# prompt modules
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import SystemMessage

import warnings

warnings.filterwarnings("ignore")

In [None]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_experimental.utilities.python import PythonREPL

In [None]:
# model_name = "minkhantycc/Llama-2-7b-chat-finetune-quantized"
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device_map = {"": 0}

# bnb config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
base_model.config.use_cache = False
# base_model.config.pretraining_tp = 1

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model.generation_config = GenerationConfig(
    max_new_tokens = 256,
    temperature = 0.01,
    repetition_penalty = 1.15,
    do_sample = False,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.eos_token_id,
)

# pipeline
pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    device_map=device_map,
    return_full_text=False
)

# llm
# hf_pipe = CustomHuggingFacePipeline(pipe, prompt_template=prompt)

In [None]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate

prompt = ChatPromptTemplate(
    input_variables=['input'],
    partial_variables={
        'agent_scratchpad': "",
        'tools': (
            'wikipedia - A wrapper around Wikipedia. Useful for general questions about people, places, companies, facts, historical events, or other subjects. '
            'Input should be a search query.\n'
            'Calculator - Useful for math-related questions.'
        ),
        'tool_names': 'wikipedia, Calculator'
    },
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['tool_names', 'tools'], 
                template=(
                    '<|system|>Answer the following questions as best you can. You have access to the following tools:</s>\n\n'
                    '{tools}\n\n'
                    'Use the tools by specifying a JSON blob with an `action` key (tool name) and an `action_input` key (input to the tool).</s>\n\n'
                    'The "action" field should only contain: {tool_names}</s>.\n\n'
                    'The $JSON_BLOB should only contain a SINGLE action. Do NOT return a list of actions. Here is an example:</s>\n\n'
                    '```\n{{\n  "action": "$TOOL_NAME",\n  "action_input": "$INPUT"\n}}\n```\n\n'
                    'REMEMBER: ALWAYS follow this format:</s>\n\n'
                    'Question: the input question you must answer</s>\n'
                    'Thought: consider what action to take</s>\n'
                    'Action:</s>\n'
                    '```\n$JSON_BLOB\n```\n'
                    'Observation: the result of the action</s>\n'
                    '... (repeat Thought/Action/Observation as needed)</s>\n'
                    'Thought: I now know the final answer</s>\n'
                    'Final Answer: the final answer to the original input question</s>\n\n'
                    'Begin! '
                    'REMEMBER: Always use the exact phrase `Final Answer` in your response.</s>'
                    '<|user|>'
                )
            )
        ), 
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=['input', 'agent_scratchpad'],
                template='{input}\n\n{agent_scratchpad}</s><|assistant|>'
            )
        )
    ]
)


In [None]:
from pydantic import BaseModel
class CustomHuggingFacePipeline(HuggingFacePipeline):
    prompt_template: ChatPromptTemplate = None

    def __post_init__(self):
        super().__post_init__()

    def _generate(self, inputs, **kwargs):
        if self.prompt_template:
            # Reformat the input text using the prompt template
            inputs = self.prompt_template.format(input=inputs)
            with open("log.txt", "+a") as f:
                f.write(inputs)
        return super()._generate(prompts=[inputs], **kwargs)

In [None]:
# llm
hf_pipe = HuggingFacePipeline(pipeline=pipe)

## Agent Testing 1

In [None]:
# from transformers import Tool, load_tool, ReactCodeAgent
# from transformers.agents.llm_engine import HfEngine
# from langchain.agents import load_tools

# # create engine
# # engine = TransformersEngine(hf_pipe)
# engine = HfEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")

# # agent
# langchain_tools = load_tools(['llm-math', 'wikipedia'], llm=hf_pipe)
# math_tool = Tool.from_langchain(langchain_tools[0])
# wiki_tool = Tool.from_langchain(langchain_tools[1])

# # agent
# agent = ReactCodeAgent(tools=[math_tool, wiki_tool], llm_engine=engine)

# agent.run("What is 25% of 300?")

## Agents Testing

In [None]:
PREFIX = """<|system|>Answer the following questions as best you can. You have access to the following tools:"""
FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```" and "```":\n

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question"""
SUFFIX = """Begin!</s>

<|user|>
Question: {input} </s>
Thought:{agent_scratchpad}</s>
<|assistant|>"""

## math and wikipedia tools
tools = load_tools(["llm-math", "wikipedia"], llm=hf_pipe)

# initialize agent
agent = initialize_agent(
    tools=tools,
    llm=hf_pipe,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,
    # agent_kwargs={
    #     'prefix': PREFIX,
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

agent.agent.llm_chain.prompt = prompt

import langchain
langchain.debug=True
# math agent test
result = agent("What is the 25% of 300?")
langchain.debug=False
result

In [None]:
# wikipedia agent test
question = "Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?"
result = agent(question)
result

In [None]:
# python agent
agent = create_python_agent(
    hf_pipe.bind(stop=["Human:"]),
    tool=PythonREPLTool(),
    verbose=True,
    # agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    agent_executor_kwargs={"handle_parsing_errors":True},
    # agent_kwargs={
    #     'prefix': PREFIX,
    #     'format_instructions': FORMAT_INSTRUCTIONS,
    #     'suffix': SUFFIX
    # }
)

agent.agent.llm_chain.prompt = prompt

# input to python
customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]
langchain.debug=True
agent.run(
    f"""Sort these custormers by last name and then first name \
    and print the output: {customer_list}"""
)
langchain.debug=False