In [1]:
"""
THIS CODE CONTAINS THE DECOMPOSITION PART
"""

'\nTHIS CODE CONTAINS THE DECOMPOSITION PART\n'

In [None]:
import os 
from bs4 import BeautifulSoup

os.environ['TORCHDYNAMO_DISABLE'] = '1'
# os.environ["HUGGINGFACE_TOKEN"] = "your-huggingface-token"

In [2]:
# LOAD THE DOCUMENT
print("Current working directory:", os.getcwd())
with open("../data/llm_powered_autonomous_agents.html", "r", encoding="utf-8") as f:
    html_content = f.read()
soup = BeautifulSoup(html_content, "html.parser")
content = soup.find_all(class_=["post-content", "post-title", "post-header"])
docs = "\n".join([c.get_text() for c in content])
print(docs)

Current working directory: /home/featurize/work/RAG_Techniques/rag_scripts


      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng




      LLM Powered Autonomous Agents
    
Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:

Planning

Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mist

In [3]:
# SPLIT
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

doc_list = [Document(page_content=docs)]
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)
splits = text_splitter.split_documents(doc_list)

In [4]:
# INDEX 
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'}
)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings
)
retriever = vectorstore.as_retriever()

  embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# LOAD THE LOCAL MODEL
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
tokenizer = AutoTokenizer.from_pretrained(
    "google/gemma-3-4b-it"
)
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-3-4b-it",
    device_map="auto",
    torch_dtype="auto",
    # token=os.environ["HUGGINGFACE_TOKEN"]
)
local_llm = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.1
)

Fetching 2 files: 100%|████████████████| 2/2 [00:22<00:00, 11.45s/it]
Loading checkpoint shards: 100%|███████| 2/2 [00:02<00:00,  1.05s/it]
Device set to use cuda:0


In [6]:
# 将local_llm封装进LangChain
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=local_llm)

  llm = HuggingFacePipeline(pipeline=local_llm)


In [7]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# 多个子问题生成prompt
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. 
The goal is to break down the input question into a set of sub-questions that can be answers in isolation. 
Generate 2 search questions, each as a full question sentence(mark as 1. and 2.), related to: {question}
Output (2 questions):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

# 组合链
generate_queries_decomposition = (
    prompt_decomposition
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)
# 示例问题
question = "What are the main components of an LLM-powered autonomous agent system?"
raw_output = generate_queries_decomposition.invoke({"question": question})
questions = [
    line.strip()
    for line in raw_output
    if line.strip().startswith("1.") or line.strip().startswith("2.")
]


In [8]:
questions

['1. What are the key architectural components of an LLM-powered autonomous agent system?',
 '2. How do LLMs interact with other components in an autonomous agent system?']

In [9]:
def extract_answer(text):
    """
    从模型返回中提取最后一段 answer
    """
    # 分割出 "Answer:" 开头后的部分
    parts = text.split("Answer:")
    if len(parts) > 1:
        return parts[-1].strip()
    return text.strip()

def format_qa_pair(question, answer):
        return f"Question: {question}\nAnswer: {answer.strip()}"

In [10]:
from operator import itemgetter
from langchain_core.runnables import RunnableLambda

# 子问题回答Prompt
template = """
Answer the following question as clearly and thoroughly as possible.

Question:
{question}

Previously answered questions:
{q_a_pairs}

Relevant context:
{context}

Answer:
"""
decomposition_prompt = ChatPromptTemplate.from_template(template)
parser = StrOutputParser()

# Q&A 积累
q_a_pairs = ""
# 遍历每个子问题
for q in questions:
    rag_chain = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
            "q_a_pairs": itemgetter("q_a_pairs")
        }
        | decomposition_prompt
        | llm
        | parser
    )

    raw_answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    answer = extract_answer(raw_answer)
    q_a_pairs += "\n---\n" + format_qa_pair(q, answer)

In [11]:
q_a_pairs

'\n---\nQuestion: 1. What are the key architectural components of an LLM-powered autonomous agent system?\nAnswer: The key architectural components of an LLM-powered autonomous agent system are:\n\n1.  **Large Language Model (LLM):** This is the core of the system, acting as the agent’s “brain.” It’s responsible for understanding instructions, generating plans, reasoning, and producing actions.\n\n2.  **Memory:** Agents need to remember past experiences and information to make informed decisions. There are several types of memory:\n    *   **Short-Term Memory (STM):**  Used for immediate context, like the current conversation or the most recent actions.\n    *   **Long-Term Memory (LTM):** Stores persistent knowledge, facts, and learned skills. This can be implemented using various techniques like vector databases, knowledge graphs, or even simple text files.\n\n3.  **Planner:** This component takes the LLM’s output (a plan) and breaks it down into a sequence of concrete actions. It tr

In [14]:
final_prompt = """
Here are several sub-questions and their answers:

{q_a_pairs}

Please synthesize a comprehensive but concise answer to the original question: {main_question}

Answer:"""
from langchain.prompts import ChatPromptTemplate
synthesis_prompt = ChatPromptTemplate.from_template(final_prompt)
unordned_answer = (
    synthesis_prompt
    | llm
    | parser
).invoke({"q_a_pairs": q_a_pairs, "main_question": question})

final_answer = extract_answer(unordned_answer)


In [15]:
final_answer

'An LLM-powered autonomous agent system combines a Large Language Model (LLM) with several key components to enable independent action and decision-making. The core elements are:\n\n1.  **Large Language Model (LLM):** The “brain” of the agent, responsible for understanding instructions, generating plans, reasoning, and producing actions.\n\n2.  **Memory:** Crucial for learning and adapting. It includes:\n    *   **Short-Term Memory (STM):**  Immediate context (current conversation, recent actions).\n    *   **Long-Term Memory (LTM):** Persistent knowledge – implemented using:\n        *   **Vector Databases:** Store embeddings of text and other data, enabling efficient retrieval of relevant information based on similarity.\n        *   **Knowledge Graphs:** Structured representations of knowledge, facilitating complex relationship queries.\n\n3.  **Planner:** Translates the LLM’s high-level plan into a sequence of concrete actions.\n\n4.  **Action Executor:** Executes the planned actio