In [1]:
%pip install --upgrade  langchain langchain-community langchainhub gpt4all chromadb bs4

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

Note: you may need to restart the kernel to use updated packages.


In [2]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

print("loader downloaded")
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

loader downloaded


In [3]:
from langchain_community.embeddings import OllamaEmbeddings, GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings())

In [4]:
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
len(docs)
docs[0]

Document(page_content='Sensory Memory: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Auton

In [5]:
# %pip install --upgrade --quiet  llama-cpp-python

!CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 /Users/rlm/miniforge3/envs/llama/bin/pip install -U llama-cpp-python --no-cache-dir

/bin/bash: /Users/rlm/miniforge3/envs/llama/bin/pip: No such file or directory


In [9]:
from langchain_community.llms import LlamaCpp, Ollama

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
# llm = LlamaCpp(
#     model_path="/Users/rlm/Desktop/Code/llama.cpp/models/llama-2-13b-chat.ggufv3.q4_0.bin",
#     n_gpu_layers=n_gpu_layers,
#     n_batch=n_batch,
#     n_ctx=2048,
#     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
#     verbose=True,
# )
llm = Ollama(model="llama2")

llm.invoke("Simulate a rap battle between Stephen Colbert and John Oliver")

"[The stage is set for a rap battle between Stephen Colbert and John Oliver. The crowd is energized and ready to witness a epic battle of wits and rhymes. Both contestants take their places at the mic, and the battle begins.]\n\nStephen Colbert: Yo, John, it's on! I'm the king of satire,\nWith biting jokes that leave you in a state of dire.\nI'll take on your fake British accent any day,\nAnd show you how to really rap like a pro in every way.\n\nJohn Oliver: Hold up, Colbert, don't get too cocky,\nI may not have your experience, but I've got the flow.\nMy rhymes are tight and my delivery is smooth,\nAnd by the end of this battle, you'll know who's in the room.\n\nStephen Colbert: Oh, really? Well let me tell you something,\nI may not be as tall as you, John, but I've got the magic.\nMy rhymes are fire and my flow is sick,\nAnd when it comes to wit, I'm the one who's quick.\n\nJohn Oliver: You may have the witty words, Colbert, but I've got the skills,\nI can rap about anything, from p

In [11]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)

# Chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain = {"docs": format_docs} | prompt | llm | StrOutputParser()

# Run
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
chain.invoke(docs)

'The main themes in these retrieved documents are:\n\n1. Sensory Memory: This theme is focused on the earliest stage of memory, which allows individuals to retain impressions of sensory information (visual, auditory, etc.) after the original stimuli have ended. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch).\n2. Tool Use: This theme encompasses the use of tools to augment large language models, such as ChemCrow, which allows for chemistry tools to be used with these models, and Scientific Discovery Agent, a generative agent simulation that demonstrates the potential for large language models to perform scientific discovery tasks. Other proof-of-concept examples include Generative Agents Simulation and Emergent Autonomous Scientific Research Capabilities of Large Language Models.\n3. Component Three: Tool Use - Case Studies, Challenges, Citation, and References. This section provides detailed information on the tool use aspect of the ar

In [18]:
from langchain import hub

rag_prompt = hub.pull("rlm/rag-prompt")
rag_prompt.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]

In [19]:
from langchain_core.runnables import RunnablePassthrough, RunnablePick

# Chain
chain = (
    RunnablePassthrough.assign(context=RunnablePick("context") | format_docs)
    | rag_prompt
    | llm
    | StrOutputParser()
)

# Run
chain.invoke({"context": docs, "question": question})

'Based on the provided context, there are several approaches to task decomposition in AI research. These include:\n\n1. Generative Agents Simulation: This approach involves using large language models (LLMs) to simulate scientific discovery and proof-of-concept examples. Tools such as API-Bank and HuggingGPT have been developed to augment LLMs with chemistry tools and solve AI tasks, respectively.\n2. Scientific Discovery Agent: This approach involves using LLMs to perform scientific discovery and research. Tools such as ChemCrow have been developed to augment LLMs with chemistry tools for emergent autonomous scientific research capabilities.\n3. Proof-of-Concept Examples: This approach involves using LLMs to demonstrate the feasibility of certain AI tasks or applications. For example, Boiko et al. (2023) demonstrated the ability of large language models to perform scientific research independently.\n\nIt is important to note that these approaches are not mutually exclusive, and they c

In [20]:
# Prompt
rag_prompt_llama = hub.pull("rlm/rag-prompt-llama")
rag_prompt_llama.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>> \nQuestion: {question} \nContext: {context} \nAnswer: [/INST]"))]

In [22]:
# Chain
chain = (
    RunnablePassthrough.assign(context=RunnablePick("context") | format_docs)
    | rag_prompt_llama
    | llm
    | StrOutputParser()
)

# Run
print(question)
chain.invoke({"context": docs, "question": question})

What are the approaches to Task Decomposition?


'Task decomposition is the process of breaking down a complex task into smaller, more manageable subtasks. There are several approaches to task decomposition, including:\n\n1. Functional decomposition: This involves breaking down a task into its individual functions or subtasks, each with a specific function or outcome.\n2. Component decomposition: This involves breaking down a task into its component parts or modules, each with a specific function or role in the overall task.\n3. Workflow decomposition: This involves breaking down a task into a series of workflows or processes, each with a specific step or action.\n4. Goal-based decomposition: This involves breaking down a task into smaller subtasks that are focused on achieving a specific goal or outcome.\n5. Task segmentation: This involves breaking down a task into smaller time segments, each with a specific duration or focus.\n\nThese approaches can be used alone or in combination to effectively break down a complex task into mana

In [23]:
retriever = vectorstore.as_retriever()
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [25]:
question = "Whhat happens at CMU on 24th August every year"
print(question)
qa_chain.invoke(question)

Whhat happens at CMU on 24th August every year


'Based on the provided context, it seems that every year on August 24th, there is an event or activity happening at Carnegie Mellon University (CMU) related to the integration of reasoning and acting within large language models (LLMs). The event is called "ReAct," which stands for "Reasoning and Acting in LLMs."\n\nAccording to the context, ReAct integrates reasoning and acting within LLMs by extending the action space to be a combination of task-specific discrete actions and the language space. This allows LLMs to interact with the environment (e.g., use Wikipedia search API) and generate reasoning traces in natural language. The event may involve presentations, workshops, or other activities focused on the development and application of these technologies.\n\nUnfortunately, I cannot provide more information about the specific event happening on August 24th at CMU without further context or details.'