<a href="https://colab.research.google.com/github/AnDDoanf/LLM-repo/blob/master/babyAGI_llama2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install langchain langchain_community langchain-huggingface bitsandbytes accelerate sentence_transformers faiss-gpu llama-index-embeddings-langchain langchain_experimental duckduckgo-search wikipedia
from torch import cuda, bfloat16
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.prompts import PromptTemplate
from langchain.agents import AgentExecutor, Tool, create_react_agent
from langchain.chains import LLMChain
from langchain_community.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
import faiss
from langchain.agents import AgentOutputParser
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS
from langchain.output_parsers.json import parse_json_markdown
from langchain.schema import AgentAction, AgentFinish

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain_core.runnables import RunnableSequence
import torch
import os

In [None]:
def build_llm(temperature = 0.2, max_new_tokens = 1000):
    model_name = "meta-llama/Llama-2-7b-chat-hf"
    device = torch.device('cuda')
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                    bnb_4bit_use_double_quant=True,
                                    bnb_4bit_quant_type="nf4",
                                    bnb_4bit_compute_dtype=bfloat16,
                                    )

    model = AutoModelForCausalLM.from_pretrained(model_name,
                                                 quantization_config=bnb_config,
                                                )
    text_generation_pipeline = pipeline(model=model,
                                        tokenizer=tokenizer,
                                        task="text-generation",
                                        temperature=temperature,
                                        repetition_penalty=1.1,
                                        return_full_text=True,
                                        do_sample=True,
                                        max_new_tokens=max_new_tokens,
                                        )

    llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
    return llm
def build_chain(prompt, temperature = 0.2, max_new_tokens = 1000):
  return RunnableSequence(prompt | build_llm(temperature, max_new_tokens))


embeddings_model = LangchainEmbedding(HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
))

embedding_size = len(embeddings_model.get_text_embedding("test"))
# print(f"Embedding size: {embedding_size}")
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.get_text_embedding, index, InMemoryDocstore({}), {})



In [None]:
a = [1,2, 3, 4]
b = [5]
a+b

[1, 2, 3, 4, 5]

In [None]:
from langchain.agents import AgentExecutor, ZeroShotAgent
from langchain.agents import load_tools
todo_prompt = PromptTemplate.from_template(
    """
    You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}
    Here is the context that can help you: {context}
    """
)

tools = load_tools(["ddg-search", "wikipedia"], llm=build_llm())

todo_chain = build_chain(todo_prompt)
todo_tool = [Tool(
        name="TODO",
        func=todo_chain.invoke,
        description="useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!",
    )]
prefix = """You are an AI who performs one task based on the following objective: {objective}. Take into account these previously completed tasks: {context}."""
suffix = """Question: {task}"""
prompt = ZeroShotAgent.create_prompt(
    tools = todo_tool+tools,
    prefix=prefix,
    suffix=suffix,
    input_variables=["objective", "task", "context"],
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# llm_chain = RunnableSequence(prompt | build_llm())
# tool_names = [tool.name for tool in tools]

class OutputParser(AgentOutputParser):
    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> AgentAction | AgentFinish:
        try:
            cleaned_text = text[text.rfind('\n')+1:]
            response = parse_json_markdown(text)
            action, action_input = response["action"], response["action_input"]
            if action == "Final Answer":
                return AgentFinish({"output": action_input}, cleaned_text)
            else:
                return AgentAction(action, action_input, text)
        except Exception:
            return AgentFinish({"output": cleaned_text}, cleaned_text)

    @property
    def _type(self) -> str:
        return "conversational_chat"


llm_chain = LLMChain(llm=build_llm(), prompt=prompt)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True,
    handle_parsing_errors=True, return_agent_finish=True,
    output_parser=OutputParser(),
    return_only_outputs=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  warn_deprecated(
  warn_deprecated(


In [None]:
from typing import Optional
from langchain_experimental.autonomous_agents import BabyAGI
# Logging of LLMChains
verbose = False
# If None, will keep on going forever
max_iterations: Optional[int] = 3
baby_agi = BabyAGI.from_llm(
    llm=build_llm(),
    vectorstore=vectorstore,
    task_execution_chain=agent_executor,
    verbose=verbose,
    max_iterations=max_iterations,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()
OBJECTIVE = "Making tart egg"

In [None]:
baby_agi({"objective":"Make egg tart",
          "context": "Im a homecook and want to make egg tart for my family, tell me what to prepare and how to make the egg tart",
          "task":"Make a to do list of what the user should prepare and what the user should do"})

  warn_deprecated(


[95m[1m
*****TASK LIST*****
[0m[0m
1: Make a todo list
[92m[1m
*****NEXT TASK*****
[0m[0m
1: Make a todo list


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mParsing LLM output produced both a final answer and a parse-able action:: You are an AI who performs one task based on the following objective: Make egg tart. Take into account these previously completed tasks: .

TODO(input: 'Input', config: 'Optional[RunnableConfig]' = None, **kwargs: 'Any') -> 'Output' - useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!
duckduckgo_search - A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.
wikipedia - A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. In