In [19]:
import os
from pprint import pprint
from dotenv import load_dotenv
load_dotenv()

from langchain.chains import RetrievalQA
from langchain_groq.chat_models import ChatGroq
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.evaluation.qa import QAGenerateChain
from langchain.evaluation.qa import QAEvalChain

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
llm = ChatGroq(api_key=os.environ["GROQ_API_KEY"], model="openai/gpt-oss-120b", temperature=0.9)

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 33163448-c3c2-4817-8a98-546d7f09dc5f)')' thrown while requesting HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/./modules.json
Retrying in 1s [Retry 1/5].


In [20]:
file = "products_2000.csv"
loader = CSVLoader(file_path=file)
data = loader.load()
index = VectorstoreIndexCreator(embedding=embedding_model, vectorstore_cls=DocArrayInMemorySearch).from_loaders([loader])

qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", verbose=True,
    retriever=index.vectorstore.as_retriever(),
    chain_type_kwargs={
        "document_separator": "<<<<>>>>"
    }
)

In [21]:
pprint(data[10].__dict__, width=140)

{'id': None,
 'metadata': {'row': 10, 'source': 'products_2000.csv'},
 'page_content': 'product_id: 2000011\n'
                 'sku: NIK-00011\n'
                 'title: Nike Footwear Max 994\n'
                 'slug: nike-footwear-max-994\n'
                 'brand: Nike\n'
                 'category: Footwear / Activewear\n'
                 'subcategory: Footwear\n'
                 'price: 178.97\n'
                 'discounted_price: 128.64\n'
                 'currency: USD\n'
                 'description: Nike Footwear Max 994: High-quality Footwear / Activewear product by Nike. Comfortable.\n'
                 'image_url: https://picsum.photos/seed/nike-footwear-max-994/1200/1200\n'
                 'image_filename: nike-footwear-max-994.jpg\n'
                 'available_countries: UK,CA\n'
                 'stock: 274\n'
                 'is_available: True\n'
                 'tags: outdoor,running,leather\n'
                 'gender: Unisex\n'
                 'color: G

In [22]:
examples = [
    {
        "query": "How many Nike Max footwears are there?",
        "answer": "Five"
    },
    {
        "query": "How many Apple watches are there?",
        "answer": "22"
    }
]

In [23]:
example_gen_chain = QAGenerateChain.from_llm(llm)
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)
new_examples[0]

  new_examples = example_gen_chain.apply_and_parse(


{'qa_pairs': {'query': 'What is the discounted price of the Nike Footwear Max 410, and what percentage discount does this represent compared to its original price?',
  'answer': 'The discounted price is **$208.71 USD**. The original price is $304.37, so the discount amount is $304.37\u202f−\u202f$208.71\u202f=\u202f$95.66. This corresponds to a discount of **approximately 31.45%** off the original price.'}}

In [24]:
examples +=new_examples

qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


'There are **4** Nike “Max” footwear products listed in the provided data.'

In [25]:
import langchain
langchain.debug = True

qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "How many Nike Max footwears are there?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How many Nike Max footwears are there?",
  "context": "product_id: 2000010\nsku: NIK-00010\ntitle: Nike Footwear Max 8424\nslug: nike-footwear-max-8424\nbrand: Nike\ncategory: Footwear / Activewear\nsubcategory: Footwear\nprice: 118.06\ndiscounted_price: 98.48\ncurrency: USD\ndescription: Nike Footwear Max 8424: High-quality Footwear / Activewear product by Nike. Durable.\nimage_url: https://picsum.photos/seed/nike-footwear-max-8424/1200/1200\nimage_filename: nike-footwear-max-8424.jpg\navailable_countries: AU,FR\nstock: 295\nis_available: True\ntags: urban,premium,fit

'There are **4** Nike\u202fFootwear\u202fMax products in the provided data.'

In [26]:
langchain.debug = False

predictions = qa.apply(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m

In [None]:
llm = ChatGroq(api_key=os.environ["GROQ_API_KEY"], model="openai/gpt-oss-120b", temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

graded_outputs = eval_chain.evaluate(examples, predictions)

# for i, eg in enumerate(examples):
#     print(f"Example {i}")
#     print(f"Example {predictions[i]}")
#     print("Question: ", predictions[i]["query"])
#     # print("Real Answer: ", predictions[i]["answer"])
#     print("Predicted Answer: ", predictions[i]["result"])
#     # print("Predicted Grade: ", predictions[i]["text"])
#     print()