## Installation

In [None]:
!pip install -U langchain-community


In [None]:
!pip install chromadb

In [None]:
!pip install pinecone

## Import Modules

In [6]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [7]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [8]:
from google.colab import userdata

## Load Data from text file

In [9]:
loader = TextLoader("/content/menu_card.txt")

In [10]:
data = loader.load()

In [None]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your sample document')
print (f'Here is a sample: {data[0].page_content[:200]}')

## Split data into chunks

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(data)

In [None]:
# Let's see how many small chunks we have
print (f'Now you have {len(texts)} documents')

## Using openai Embedding model

In [None]:
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [15]:
# load it into Chroma
vectorstore = Chroma.from_documents(texts, embeddings)

In [16]:
query = "Do you have coffee?"
docs = vectorstore.similarity_search(query)

In [None]:
for doc in docs:
    print (f"{doc.page_content}\n")

## Import Langchain qa chain

In [18]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

In [None]:
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [20]:
query = "Do you have coffee?"
docs = vectorstore.similarity_search(query)

In [None]:
chain.run(input_documents=docs, question=query)

## Extract data using Similarity search

In [None]:
query = "What is the price of latte coffee?"
docs = vectorstore.similarity_search(query)


In [None]:
chain.run(input_documents=docs, question=query)

## Pass context and question and get result

In [None]:
query = "What is the cheapest pizza?"
docs = vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)