# Evaluating the Answers

In [28]:
import openai
import os
from dotenv import load_dotenv, find_dotenv
import numpy as np
from IPython.display import display, Markdown, HTML

import urllib
from PyPDF2 import PdfReader
from langchain.document_loaders import UnstructuredHTMLLoader, BSHTMLLoader, PyPDFLoader, CSVLoader

from langchain.text_splitter import CharacterTextSplitter

from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

from langchain.docstore.document import Document
from langchain.vectorstores import DocArrayInMemorySearch, FAISS, Chroma
from langchain.indexes import VectorstoreIndexCreator

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI 
from langchain.chains import RetrievalQA

from langchain.callbacks import get_openai_callback

import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
# Load and set API key
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

model_name="gpt-3.5-turbo"

**Load the PDF**

In [29]:
file_path = 'data/myntra_products_catalog.csv'
loader = CSVLoader(file_path)
data = loader.load()

**Create the QA Chain**

In [30]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

index.vectorstore.similarity_search(query="men cargo pants", k=5)

[Document(page_content='ProductID: 10001131\nProductName: MANGO Women Black Slim Fit Solid Cargos\nProductBrand: MANGO\nGender: Women\nPrice (INR): 1716\nNumImages: 7\nDescription: Black solid mid-rise cargos, has a waistband with belt loops, button closure, zip fly, and six pockets\nPrimaryColor: Black', metadata={'source': 'data/myntra_products_catalog.csv', 'row': 782}),
 Document(page_content='ProductID: 10149831\nProductName: HERE&NOW Men Brown Regular Fit Solid Cargos\nProductBrand: HERE&NOW\nGender: Men\nPrice (INR): 1259\nNumImages: 5\nDescription: Brown solid mid-rise trousers, button closure, and 6 pockets\nPrimaryColor: Brown', metadata={'source': 'data/myntra_products_catalog.csv', 'row': 5517}),
 Document(page_content='ProductID: 10247579\nProductName: beevee Men Brown & Black Straight Fit Camouflage Printed Cargos\nProductBrand: beevee\nGender: Men\nPrice (INR): 900\nNumImages: 5\nDescription: Brown and Black mid-rise camouflage print trousers, button and drawstring closu

In [31]:
llm = ChatOpenAI(temperature = 0.0, model=model_name)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    },
)

## Generate examples

By generating examples of questions and their answer, we can evluate the model's responses.
Generating examples manually is time consuming, which is why we will generate them using the LLM itself.

**Manually generated examples**

In [41]:
data[0].page_content

'ProductID: 10017413\nProductName: DKNY Unisex Black & Grey Printed Medium Trolley Bag\nProductBrand: DKNY\nGender: Unisex\nPrice (INR): 11745\nNumImages: 7\nDescription: Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer\nPrimaryColor: Black'

In [45]:
examples = [
    {
        "query": "Does the DKNY Unisex Black & Grey Printed Medium Trolley Bag \
        have a TSA lockOne handle?",
        "answer": "Yes"
    },
]

**LLM-generated examples**

In [68]:
from langchain.evaluation.qa import QAGenerateChain

# Create an example generation chain
example_gen_chain = QAGenerateChain.from_llm(
    ChatOpenAI(model=model_name)
)

# Generate 5 examples
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

display(data[0])
display(new_examples[0])



Document(page_content='ProductID: 10017413\nProductName: DKNY Unisex Black & Grey Printed Medium Trolley Bag\nProductBrand: DKNY\nGender: Unisex\nPrice (INR): 11745\nNumImages: 7\nDescription: Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartment, zip lining, two compression straps with click clasps, one zip compartment on the flap with three zip pocketsWarranty: 5 yearsWarranty provided by Brand Owner / Manufacturer\nPrimaryColor: Black', metadata={'source': 'data/myntra_products_catalog.csv', 'row': 0})

{'qa_pairs': {'query': 'What is the primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag?',
  'answer': 'The primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is black.'}}

## Evaluate the LLM

Again, while we can evaluate the LLM manually, it is time consuming. We will use the LLM to evaluate itself.

**Manual evaluation**

In [43]:
import langchain
langchain.debug = True

In [46]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Does the DKNY Unisex Black & Grey Printed Medium Trolley Bag         have a TSA lockOne handle?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Does the DKNY Unisex Black & Grey Printed Medium Trolley Bag         have a TSA lockOne handle?",
  "context": "ProductID: 10017413\nProductName: DKNY Unisex Black & Grey Printed Medium Trolley Bag\nProductBrand: DKNY\nGender: Unisex\nPrice (INR): 11745\nNumImages: 7\nDescription: Black and grey printed medium trolley bag, secured with a TSA lockOne handle on the top and one on the side, has a trolley with a retractable handle on the top and four corner mounted inline skate wheelsOne main zip compartm

'Yes, the DKNY Unisex Black & Grey Printed Medium Trolley Bag has a TSA lock and one handle.'

In [47]:
langchain.debug = False

**LLM-assisted evaluation**

In [69]:
# Extract query and answer from the qa_pairs key
new_examples = [ex['qa_pairs'] for ex in new_examples]
new_examples[0]

{'query': 'What is the primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag?',
 'answer': 'The primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is black.'}

In [70]:
from langchain.evaluation.qa import QAEvalChain

# Predict the answers from the examples
predictions = qa.apply(new_examples)

# Create an evaluation chain
eval_chain = QAEvalChain.from_llm(
    ChatOpenAI(temperature=0, model=model_name)
)

# Evaluate the predictions
graded_outputs = eval_chain.evaluate(new_examples, predictions)
graded_outputs[0]



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'results': 'CORRECT'}

In [73]:
for i, eg in enumerate(new_examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: What is the primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag?
Real Answer: The primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is black.
Predicted Answer: The primary color of the DKNY Unisex Black & Grey Printed Medium Trolley Bag is black.
Predicted Grade: CORRECT

Example 1:
Question: What is the product name and brand mentioned in the document?
Real Answer: The product name is "EthnoVogue Women Beige & Grey Made to Measure Custom Made Kurta Set with Jacket" and the brand is "EthnoVogue".
Predicted Answer: I'm sorry, but I don't have access to the specific document you are referring to. Can you please provide more information or context?
Predicted Grade: CORRECT

Example 2:
Question: What is the product ID of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Cropped Jeans?
Real Answer: The product ID of the SPYKAR Women Pink Alexa Super Skinny Fit High-Rise Clean Look Stretchable Croppe