In [3]:
!pip install -U langchain-community



In [4]:
!pip install chromadb



In [5]:
!pip install pinecone



In [6]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [7]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [8]:
from google.colab import userdata

In [9]:
loader = TextLoader("/content/menu_card.txt")

In [10]:
data = loader.load()

In [11]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your sample document')
print (f'Here is a sample: {data[0].page_content[:200]}')

You have 1 document(s) in your data
There are 4236 characters in your sample document
Here is a sample: Restaurant Name: Gourmet Haven

Menu:

Item: Espresso Coffee
Ingredients: Finely ground coffee beans, water
Taste: Strong, bold, slightly bitter
Price: $2.50
Cooking Process: Hot water is forced throu


In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(data)

In [13]:
# Let's see how many small chunks we have
print (f'Now you have {len(texts)} documents')

Now you have 11 documents


In [14]:
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

  embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)


In [15]:
# load it into Chroma
vectorstore = Chroma.from_documents(texts, embeddings)

In [16]:
query = "Do you have coffee?"
docs = vectorstore.similarity_search(query)

In [17]:
for doc in docs:
    print (f"{doc.page_content}\n")

Restaurant Name: Gourmet Haven

Menu:

Item: Espresso Coffee
Ingredients: Finely ground coffee beans, water
Taste: Strong, bold, slightly bitter
Price: $2.50
Cooking Process: Hot water is forced through finely ground coffee under high pressure to create a concentrated shot.

Item: Cold Brew Coffee
Ingredients: Coarsely ground coffee, cold water
Taste: Smooth, less acidic, mildly sweet
Price: $4.00
Cooking Process: Coffee grounds are steeped in cold water for 12â€“24 hours and then filtered.

Item: Americano Coffee
Ingredients: Espresso, hot water
Taste: Mild, smooth, slightly bitter
Price: $2.75
Cooking Process: Espresso is diluted with hot water to create a lighter coffee drink.

Item: Latte Coffee
Ingredients: Espresso, steamed milk, milk foam
Taste: Smooth, creamy with mild coffee flavor
Price: $3.75
Cooking Process: A shot of espresso is mixed with steamed milk and topped with a light layer of foam.

Item: Cappuccino Coffee
Ingredients: Espresso, steamed milk, thick milk foam
Taste

In [18]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

In [19]:
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

  llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  chain = load_qa_chain(llm, chain_type="stuff")


In [20]:
query = "Do you have coffee?"
docs = vectorstore.similarity_search(query)

In [21]:
chain.run(input_documents=docs, question=query)

  chain.run(input_documents=docs, question=query)


'Yes, we have a variety of coffee options at Gourmet Haven, including Espresso Coffee, Cold Brew Coffee, Americano Coffee, Latte Coffee, and Cappuccino Coffee.'

In [22]:
query = "What is the price of latte coffee?"
docs = vectorstore.similarity_search(query)

In [23]:
chain.run(input_documents=docs, question=query)

'The price of Latte Coffee at Gourmet Haven is $3.75.'

In [24]:
query = "What is the cheapest pizza?"
docs = vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)

'The Margherita Pizza is the cheapest pizza priced at $8.99.'