## Setup Environment

In [None]:
# %%capture
# %pip install langchain
# %pip install langchain-hub
# %pip install langchain-community langchain-huggingface
# %pip install huggingface_hub transformers
# %pip install sentence_transformers==2.2.2
# %pip install chromadb faiss accelerate
# %pip install -U bitsandbytes
# %pip install tiktoken python-dotenv
# %pip install faiss-gpu
# %pip install InstructorEmbedding docarray
# %pip install langchain_experimental
# %pip install wikipedia
# %pip install numexpr

## Modules

In [None]:
# llm modules
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from transformers import GenerationConfig
from langchain_huggingface.llms import HuggingFacePipeline

# chain
from langchain.chains.conversation.base import ConversationChain
from langchain.chains.llm import LLMChain

# prompt modules
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import SystemMessage

# base modules
import os
import torch
import warnings

warnings.filterwarnings("ignore")

## LLM

In [None]:
prompt_template = """<|system|>Answer the following questions as best you can. You have access to the following tools:

wikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.
Calculator: Useful for when you need to answer questions about math.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [wikipedia, Calculator]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
{agent_scratchpad}</s>
"""

In [None]:
# model_name = "minkhantycc/Llama-2-7b-chat-finetune-quantized"
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device_map = {"": 0}

# bnb config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model.generation_config = GenerationConfig(
    max_new_tokens = 256,
    temperature = 0.000001,
    repetition_penalty = 1.15,
    do_sample = False,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.eos_token_id,
)

# pipeline
pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    device_map=device_map,
    return_full_text=False
)

# llm
hf_pipe = HuggingFacePipeline(pipeline=pipe)

In [None]:
prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content=prompt_template),
        HumanMessagePromptTemplate.from_template("{input}")
    ],
    partial_variables={"agent_scratchpad": ""}
)

llm = LLMChain(
    llm=hf_pipe,
    prompt=prompt
)

## Built-in Tools

In [None]:
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.agents import load_tools, initialize_agent
from langchain.agents import AgentType
from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain_experimental.utilities.python import PythonREPL

In [None]:
## math and wikipedia tools
tools = load_tools(["llm-math", "wikipedia"], llm=hf_pipe)

# initialize agent
agent = initialize_agent(
    tools,
    hf_pipe,
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

In [None]:
import langchain
langchain.debug=True
# math agent test
agent("What is the 25% of 300?")
langchain.debug=False

In [None]:
# wikipedia agent test
question = "Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?"
result = agent(question)

In [None]:
# python agent
agent = create_python_agent(
    llm,
    tool=PythonREPLTool(),
    verbose=True
)

# input to python
customer_list = [
    ["Harrison", "Chase"],
    ["Lang", "Chain"],
    ["Dolly", "Too"],
    ["Elle", "Elem"],
    ["Geoff","Fusion"],
    ["Trance","Former"],
    ["Jen","Ayai"]
]

agent.run(
    f"""<s>[INST]<<SYS>>Sort these custormers by last name and then first name \
    and print the output: {customer_list}<</SYS>>[/INST]"""
)