In [1]:
# split
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [2]:
# embedding
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

Found model file at  /home/zk/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


In [3]:
# 实例化一个大模型
from langchain.llms import GPT4All
llm = GPT4All(model="../models/nous-hermes-13b.ggmlv3.q4_0.bin", max_tokens=2048, verbose=True, allow_download=True)

Found model file at  ./models/nous-hermes-13b.ggmlv3.q4_0.bin


llama.cpp: loading model from ./models/nous-hermes-13b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32001
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.09 MB
llama_model_load_internal: mem required  = 9031.71 MB (+ 1608.00 MB per state)
llama_new_context_with_model: kv self size  = 1600.00 MB


## LLMChain

In [4]:
from langchain import PromptTemplate, LLMChain

# Prompt
prompt = PromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Run
question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)
llm_chain_result = llm_chain(docs)

# Output
llm_chain_result["text"]

''

## load_qa_chain

In [5]:
from langchain.chains.question_answering import load_qa_chain
# Prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

# Chain
qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_CHAIN_PROMPT)

question = "What are the approaches to Task Decomposition?"
docs = vectorstore.similarity_search(question)

qa_chain_result = qa_chain({"input_documents": docs, "question": question}, return_only_outputs=True)

print(qa_chain_result['output_text'])

 There are three main approaches to task decomposition: (1) using language model prompts with simple instructions like "Steps for XYZ.\n1.", (2) using task-specific instructions, such as "Write a story outline." for writing a novel, or (3) combining human inputs. However, challenges remain in long-term planning and adjusting plans when faced with unexpected errors, making LLMs less robust compared to humans who learn from trial and error during execution.


## RetrievalQA

In [6]:
from langchain.chains import RetrievalQA

retrieval_qa = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

retrieval_qa({"query": question})

{'query': 'What are the approaches to Task Decomposition?',
 'result': ' There are three main approaches to task decomposition: (1) using language model prompts with simple instructions like "Steps for XYZ.\\n1.", (2) using task-specific instructions, such as "Write a story outline." for writing a novel, or (3) combining human inputs. However, challenges remain in long-term planning and adjusting plans when faced with unexpected errors, making LLMs less robust compared to humans who learn from trial and error during execution.'}