Load huggingface api tokens

In [1]:
import os
HF_API_TOKEN = open("../credentials/huggingface.key.txt").read().replace("\n", "").replace(" ", "")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_TOKEN

Setup inference api for UL2 and T5-XXL

In [None]:
from huggingface_hub.inference_api import InferenceApi

# Flan-UL2 20B
inference_flan_ul2 = InferenceApi(repo_id="google/flan-ul2", token=HF_API_TOKEN)
# Flan-T5-XXL 11B
inference_flan_t5_xxl = InferenceApi(repo_id="google/flan-t5-xxl", token=HF_API_TOKEN)

## Question Answering from Context

In [2]:
from langchain.document_loaders import UnstructuredFileLoader

In [3]:
doc1 = """
Install the Python SDK with pip install "unstructured[local-inference]" - If you do not need to process PDFs or images, you can run pip install unstructured
Install the following system dependencies if they are not already available on your system. Depending on what document types you're parsing, you may not need all of these.
libmagic-dev (filetype detection)
poppler-utils (images and PDFs)
tesseract-ocr (images and PDFs)
libreoffice (MS Office docs)
If you are parsing PDFs, run the following to install the detectron2 model, which unstructured uses for layout detection:
pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"
"""

doc2 = """
By default, Chains and Agents are stateless, meaning that they treat each incoming query independently.
In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions, both at a short term but also at a long term level.
The concept of "Memory" exists to do exactly that.
"""

doc3 = """
Chroma lets you manage collections of embeddings, using the collection primitive.

Creating, inspecting, and deleting Collections
Chroma uses collection names in the url, so there are a few restrictions on naming them:

The length of the name must be between 3 and 63 characters.
The name must start and end with a lowercase letter or a digit, and it can contain dots, dashes, and underscores in between.
The name must not contain two consecutive dots.
The name must not be a valid IP address.
"""

doc = doc1 + "\n\n" + doc2 + "\n\n" + doc3
with open("doc.txt", "w") as f:
    f.write(doc)

In [4]:
loader = UnstructuredFileLoader("doc.txt")
docs = loader.load()

In [5]:
docs

[Document(page_content='Install the Python SDK with pip install "unstructured[local-inference]" - If you do not need to process PDFs or images, you can run pip install unstructured Install the following system dependencies if they are not already available on your system. Depending on what document types you\'re parsing, you may not need all of these. libmagic-dev (filetype detection) poppler-utils (images and PDFs) tesseract-ocr (images and PDFs) libreoffice (MS Office docs) If you are parsing PDFs, run the following to install the detectron2 model, which unstructured uses for layout detection: pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"\n\nBy default, Chains and Agents are stateless, meaning that they treat each incoming query independently. In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions, both at a short term but also at a long term level. The concept of "Memory"

In [6]:
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import VectorDBQA

In [7]:
flan_ul2 = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature": 0.1, "max_new_tokens": 256})

  from .autonotebook import tqdm as notebook_tqdm


Perform document similarity search comparing embeddings generated from online vector store

In [10]:
text_splitter = CharacterTextSplitter(chunk_size=150, chunk_overlap=0)
texts = text_splitter.split_text(docs[0].page_content)
embeddings = HuggingFaceHubEmbeddings()
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])

Created a chunk of size 680, which is longer than the specified 150
Created a chunk of size 318, which is longer than the specified 150
You're using a different task than the one specified in the repository. Be sure to know what you're doing :)
Using embedded DuckDB without persistence: data will be transient


In [13]:
chain = load_qa_with_sources_chain(flan_ul2, chain_type="refine")

In [14]:
%%time
question = "A name cannot be ________?"
docs = docsearch.similarity_search(question)
chain({"input_documents" : docs, "question": question})

CPU times: user 251 ms, sys: 898 µs, total: 252 ms
Wall time: 5.8 s


{'input_documents': [Document(page_content='The length of the name must be between 3 and 63 characters. The name must start and end with a lowercase letter or a digit, and it can contain dots, dashes, and underscores in between. The name must not contain two consecutive dots. The name must not be a valid IP address.', metadata={'source': '4'}),
  Document(page_content='Creating, inspecting, and deleting Collections Chroma uses collection names in the url, so there are a few restrictions on naming them:', metadata={'source': '3'}),
  Document(page_content='By default, Chains and Agents are stateless, meaning that they treat each incoming query independently. In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions, both at a short term but also at a long term level. The concept of "Memory" exists to do exactly that.', metadata={'source': '1'}),
  Document(page_content='Chroma lets you manage collections of embeddings, using the colle

> "What are the rules for a name?"

In [15]:
%%time
question = "What is the detectron repo?"
docs = docsearch.similarity_search(question)
chain({"input_documents" : docs, "question": question})

CPU times: user 256 ms, sys: 9.01 ms, total: 265 ms
Wall time: 5.88 s


{'input_documents': [Document(page_content='Creating, inspecting, and deleting Collections Chroma uses collection names in the url, so there are a few restrictions on naming them:', metadata={'source': '3'}),
  Document(page_content='Install the Python SDK with pip install "unstructured[local-inference]" - If you do not need to process PDFs or images, you can run pip install unstructured Install the following system dependencies if they are not already available on your system. Depending on what document types you\'re parsing, you may not need all of these. libmagic-dev (filetype detection) poppler-utils (images and PDFs) tesseract-ocr (images and PDFs) libreoffice (MS Office docs) If you are parsing PDFs, run the following to install the detectron2 model, which unstructured uses for layout detection: pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"', metadata={'source': '0'}),
  Document(page_content='Chroma lets you manage collect

In [None]:
%%time
question = "What are the rules for a name?"
docs = docsearch.similarity_search(question)
chain({"input_documents" : docs, "question": question})

# Prompts

In [17]:
from langchain.prompts import PromptTemplate

template = """
Given the following extracted parts of a long document and a question, create a final answer.
If you don't know the answer, just say that you "don't know". Don't try to make up an answer.

Respond in English and Order and fix typos.

QUESTION: {question}
=========
{summaries}
=========

FINAL ANSWER IN English."""
Prompt = PromptTemplate(template=template, input_variables=["summaries", "question"])

In [19]:
chain = load_qa_with_sources_chain(flan_ul2, chain_type="stuff", prompt=Prompt)

In [20]:
%%time
question = "What is today's date?"
docs = docsearch.similarity_search(question)
chain({"input_documents" : docs, "question": question})

CPU times: user 102 ms, sys: 96 µs, total: 102 ms
Wall time: 2.6 s


{'input_documents': [Document(page_content='The length of the name must be between 3 and 63 characters. The name must start and end with a lowercase letter or a digit, and it can contain dots, dashes, and underscores in between. The name must not contain two consecutive dots. The name must not be a valid IP address.', metadata={'source': '4'}),
  Document(page_content='By default, Chains and Agents are stateless, meaning that they treat each incoming query independently. In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions, both at a short term but also at a long term level. The concept of "Memory" exists to do exactly that.', metadata={'source': '1'}),
  Document(page_content='Creating, inspecting, and deleting Collections Chroma uses collection names in the url, so there are a few restrictions on naming them:', metadata={'source': '3'}),
  Document(page_content='Install the Python SDK with pip install "unstructured[local-infer