## LangChain: Q&A over Documents

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from IPython.display import display
from IPython.display import Markdown
import textwrap

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
import os

api_key = os.getenv("GOOGLE_API_KEY")

In [5]:
# Gemini model
from langchain_google_genai import GoogleGenerativeAI

from langchain_community.document_loaders.csv_loader import CSVLoader # CSV file loader
from langchain_google_genai import GoogleGenerativeAIEmbeddings  # Gemini embedding model
from langchain.chains import RetrievalQA # for retrieval
from langchain.vectorstores import DocArrayInMemorySearch # vector store/database
from langchain.indexes import VectorstoreIndexCreator # indexing for vector stores

In [6]:
file = "datasets\sample_data.csv"
loader = CSVLoader(file_path=file)

In [7]:
loader.load()[0]

Document(metadata={'source': 'datasets\\sample_data.csv', 'row': 0}, page_content="Text: Habiendo probado un par de otras marcas de galletas sÃ¡ndwich sin gluten, estas son las mejores del grupo.Son crujientes y fieles a la textura de las otras galletas 'reales' que no son sin gluten.Algunos podrÃ\xadan pensar que el relleno las hace un poco demasiado dulces,\\ \npero para mÃ\xad eso solo significa que he satisfecho mi gusto por lo dulce mÃ¡s rÃ¡pido.La versiÃ³n de chocolate de Glutino es igual de buena y tiene un verdadero sabor a 'chocolate',\\ \nalgo que no estÃ¡ presente en las otras marcas sin gluten disponibles.\nScore: 5")

In [8]:
# !pip install docarray

In [27]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

In [28]:
query ="Please list all the languages according to the reviews \
in a table in markdown and summarize each one."

In [29]:
model = GoogleGenerativeAI(temperature=1, model="gemini-1.5-flash")

response = index.query(query, llm=model)

In [30]:
to_markdown(response)

> | Language | Summary |
> |---|---|
> | Dutch | A positive review praising the product's taste, ease of use, and packaging design. The reviewer felt the taste was refreshing, not too sweet, and mixed well without any residue.  |
> | Italian | A somewhat negative review. The reviewer found the product's taste to be muddy and not as expected, despite it being advertised as a company favorite. |
> | Spanish | A positive review of gluten-free sandwich cookies. The reviewer found them crunchy, similar in texture to non-gluten-free versions, and praised the chocolate flavor.  While acknowledging the filling might be too sweet for some, the reviewer enjoyed it. |


In [31]:
# How embeddings works?
embed = embedding.embed_query("Hello, my name is Charan")

In [32]:
print("Total embeddings size:", len(embed))
print("Embedding samples:", embed[:4])

Total embeddings size: 768
Embedding samples: [0.008325748145580292, -0.013554812408983707, -0.03769350051879883, -0.006321687716990709]


In [33]:
docs = loader.load()

docs[0]

Document(metadata={'source': 'datasets\\sample_data.csv', 'row': 0}, page_content="Text: Habiendo probado un par de otras marcas de galletas sÃ¡ndwich sin gluten, estas son las mejores del grupo.Son crujientes y fieles a la textura de las otras galletas 'reales' que no son sin gluten.Algunos podrÃ\xadan pensar que el relleno las hace un poco demasiado dulces,\\ \npero para mÃ\xad eso solo significa que he satisfecho mi gusto por lo dulce mÃ¡s rÃ¡pido.La versiÃ³n de chocolate de Glutino es igual de buena y tiene un verdadero sabor a 'chocolate',\\ \nalgo que no estÃ¡ presente en las otras marcas sin gluten disponibles.\nScore: 5")

In [35]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embeddings
)

In [41]:
retriever = db.as_retriever()

In [69]:
from langchain_google_genai import ChatGoogleGenerativeAI

chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-pro")

In [70]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=chat_model,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

In [74]:
query = "Please find the negative review, then mention what language it is in and the given score."

In [75]:
response = qa_stuff.invoke(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [76]:
response

{'query': 'Please find the negative review, then mention what language it is in and the given score.',
 'result': 'The negative review is: "Un po\' meno di quanto mi aspettassi. Tende ad avere un sapore fangoso,non quello che mi aspettavo dato che hanno detto che era il preferito dell\'azienda."\n\nIt is written in Italian and has a score of 3.\n'}

In [49]:
type(response)

dict

In [78]:
to_markdown(response['result'])

> The negative review is: "Un po' meno di quanto mi aspettassi. Tende ad avere un sapore fangoso,non quello che mi aspettavo dato che hanno detto che era il preferito dell'azienda."
> 
> It is written in Italian and has a score of 3.
