## Installation

In [None]:
!pip install langchain-groq
!pip install -U langchain-community
!pip install chromadb
!pip install pinecone

## Import Module

In [3]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma, Pinecone
import pinecone
from google.colab import userdata
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain

## Load data from text file

In [None]:
loader = TextLoader("/content/menu_card.txt")
data = loader.load()
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your sample document')
print (f'Here is a sample: {data[0].page_content[:200]}')

## Split data into chunks

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(data)

In [None]:
# Let's see how many small chunks we have
print (f'Now you have {len(texts)} documents')

## Using Hugging face Embedding model

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="jinaai/jina-embeddings-v2-small-en",  # 1536 dim
    model_kwargs={"trust_remote_code": True}
)


# Assuming 'texts' is a list of Documents or strings
vectorstore2 = Chroma.from_documents(texts, embeddings)

## Using Groq ChatGroq

In [8]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=userdata.get('GROQ_API_KEY')
)

## Creating Chain

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")

## Pass Context with question and get result

In [None]:
query = "Do you have coffee?"
docs = vectorstore2.similarity_search(query)
chain.run(input_documents=docs, question=query)

In [None]:
query = "What is the cheapest pizza?"
docs = vectorstore2.similarity_search(query)
chain.run(input_documents=docs, question=query)