In [1]:
! pip install langchain openai chromadb tiktoken pypdf panel

Collecting langchain
  Downloading langchain-0.0.352-py3-none-any.whl.metadata (13 kB)
Collecting openai
  Downloading openai-1.6.0-py3-none-any.whl.metadata (17 kB)
Collecting chromadb
  Downloading chromadb-0.4.21-py3-none-any.whl.metadata (7.3 kB)
Collecting tiktoken
  Downloading tiktoken-0.5.2-cp311-cp311-macosx_10_9_x86_64.whl.metadata (6.6 kB)
Collecting pypdf
  Downloading pypdf-3.17.3-py3-none-any.whl.metadata (7.5 kB)
Collecting panel
  Downloading panel-1.3.6-py2.py3-none-any.whl.metadata (24 kB)
Collecting PyYAML>=5.3 (from langchain)
  Downloading PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.23-cp311-cp311-macosx_10_9_x86_64.whl.metadata (9.6 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.9.1-cp311-cp311-macosx_10_9_x86_64.whl.metadata (7.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-a

In [20]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain


In [23]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [30]:
# load document
loader = PyPDFLoader("sample.pdf")
documents = loader.load()
documents

[Document(page_content=' A Simple PDF File \n This is a small demonstration .pdf file - \n just for use in the Virtual Mechanics tutorials. More text. And more \n text. And more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. Boring, zzzzz. And more text. And more text. And \n more text. And more text. And more text. And more text. And more text. \n And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. And more text. Even more. Continued on page 2 ...', metadata={'source': 'sample.pdf', 'page': 0}),
 Document(page_content=' Simple PDF File 2 \n ...continued from page 1. Yet more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. Oh, how boring typing this stuff. But not as boring as watching \n paint dry. And more text. And more text. And more text. And more text. \n 

In [31]:
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
texts

[Document(page_content='A Simple PDF File \n This is a small demonstration .pdf file - \n just for use in the Virtual Mechanics tutorials. More text. And more \n text. And more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. Boring, zzzzz. And more text. And more text. And \n more text. And more text. And more text. And more text. And more text. \n And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. And more text. Even more. Continued on page 2 ...', metadata={'source': 'sample.pdf', 'page': 0}),
 Document(page_content='Simple PDF File 2 \n ...continued from page 1. Yet more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. Oh, how boring typing this stuff. But not as boring as watching \n paint dry. And more text. And more text. And more text. And more text. \n Bo

In [34]:
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
db


<langchain_community.vectorstores.chroma.Chroma at 0x11897bd50>

In [35]:

# create a chain to answer questions 
qa = ConversationalRetrievalChain.from_llm(OpenAI(), retriever)
chat_history = []
query = "Give me a summary of the document"
result = qa({"question": query, "chat_history": chat_history})

In [36]:
result["answer"]


' The document is a small demonstration PDF file for use in Virtual Mechanics tutorials. It includes multiple repetitions of the text "More text" and the phrase "And more text," as well as a statement about the text being boring. It also mentions that the document continues on to page 2.'

In [26]:
chat_history = [(query, result["answer"])]
query = "What is this number divided by 2?"
result = qa({"question": query, "chat_history": chat_history})

In [27]:
chat_history

[('what is the total number of AI publications?', " I don't know.")]

In [28]:
result['answer']

" I don't know."