In [None]:
import os
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import tiktoken
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [None]:
# Set up environment variables for API keys and tracing
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "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"
os.environ['HUGGINGFACE_API_KEY'] = "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"

In [None]:
#### INDEXING ####
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [None]:
# Split the document into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
splits = text_splitter.split_documents(docs[:2])  # Process smaller dataset initially

# Load Hugging Face model for embeddings
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_function = HuggingFaceEmbeddings(model_name=model_name)

# Store documents in Chroma using the embedding function
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embedding_function
)

# Create a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})
print("Retriever created successfully!")

  embedding_function = HuggingFaceEmbeddings(model_name=model_name)


Retriever created successfully!


In [None]:
### RETRIEVAL ###
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

# Tokenization function using tiktoken
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

# Example usage of the tokenization function
tokens = num_tokens_from_string(question, "cl100k_base")
print(f"Number of tokens in the question: {tokens}")

Number of tokens in the question: 8


In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline

### GENERATION ###
# Retrieve relevant documents based on the query
retrieved_docs = retriever.get_relevant_documents("What is Task Decomposition?")

# Prompt template for the LLM
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Initialize the Hugging Face model for text generation with increased max_length
generation_pipeline = pipeline(
    "text-generation",
    model="gpt2",
    max_length=200,  # Increase the max_length to accommodate longer inputs
    pad_token_id=50256  # Set the pad_token_id to eos_token_id
)
llm = HuggingFacePipeline(pipeline=generation_pipeline)

# Create a chain combining the prompt and LLM
chain = prompt | llm

# Run the chain with the retrieved documents and question
response = chain.invoke({
    "context": retrieved_docs[0].page_content,
    "question": "What is Task Decomposition?"
})
print(response)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Human: Answer the question based only on the following context:
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

Question: What is Task Decomposition?

Answer:

Task decomposition is the transformation of tasks into tasks which they do, e.g. using task-specific instructions (for example: when a task in memory completes, it would complete the task that was executed earlier in the process in order to get to the next point in task). The term task-convergence is typically used from the perspective of human programmers (e.g. "Where do I start?") In human programming, every task in memory is equivalent to a task


In [None]:
### RAG CHAIN ###
# Pull a RAG prompt template from LangChain's hub (Optional)
prompt_hub_rag = hub.pull("rlm/rag-prompt")

# Create a RAG chain combining retriever, prompt, and LLM
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Run the RAG chain
rag_response = rag_chain.invoke("What is Task Decomposition?")
print(rag_response)

  prompt = loads(json.dumps(prompt_object.manifest))


Human: Answer the question based only on the following context:
[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.')]

Question: What is Task Decomposition?

Why are task decompositions performed? (1)

The reason task decompositions are implemented (for each task) is that every time a task is completed, the processor writes to the local file system (that's what a file can do) the steps, and this will automatically be removed. (2
