# LangChain: Evaluation

## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Create our QandA application

In [3]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader , PyMuPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [10]:
path = 'input/Wartsilla/SWR-000349841_Check of engine automation system alarms_External.pdf'

loader = PyMuPDFLoader(path)
data = loader.load()
pages = loader.load_and_split()

In [5]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [6]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [13]:
data[1]

Document(page_content='© Wärtsilä 2022\nTable of contents\nDisclaimer\n3\nOrder\n3\nScope of work\n4\nSpare parts\n4\nEngine details and running hours\n4\n1 Background\n5\n2 Description of work\n5\n3 Recommendations\n8\n4 Site safety\n8\n5 Calibrated measuring devices utilized\n8\n6 Conclusions\n8\nAttachments\n8\nOffline copy downloaded from Wärtsilä Technical Knowledge Base 27 Apr 2023 by jacob.abonde_external@wartsila.com , \n', metadata={'source': 'input/Wartsilla/SWR-000349841_Check of engine automation system alarms_External.pdf', 'file_path': 'input/Wartsilla/SWR-000349841_Check of engine automation system alarms_External.pdf', 'page': 1, 'total_pages': 8, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Telerik RadRichTextBox', 'producer': 'pdf-lib (https://github.com/Hopding/pdf-lib)', 'creationDate': "D:20220815080008+02'00'", 'modDate': 'D:20230427113753Z', 'trapped': ''})

### LLM-Generated examples

Generating Q/A pairs using LLM

In [7]:
from langchain.evaluation.qa import QAGenerateChain

In [8]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [14]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]] )

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


In [15]:
examples =[]
examples+=new_examples

In [16]:
examples

[{'query': 'Who performed and reported the Check of engine automation system alarms on the TROLLFJORD?',
  'answer': 'Jari Tegelberg performed and reported the Check of engine automation system alarms on the TROLLFJORD.'},
 {'query': 'What is the date and time when the offline copy of the document was downloaded from Wärtsilä Technical Knowledge Base?',
  'answer': 'The offline copy of the document was downloaded from Wärtsilä Technical Knowledge Base on April 27, 2023, at 11:37:53 UTC.'},
 {'query': 'What is the purpose of the disclaimer included in the Service Work Report document?',
  'answer': "The disclaimer included in the Service Work Report document states that no part of the publication may be reproduced or copied without prior written permission of the copyright holder, and that Wärtsilä assumes no responsibility for the correctness, errors, or omissions of information contained in the publication. It also states that any actions taken by the owner/operator as a result of the

In [None]:
qa.run(examples[0]["query"])



[1m> Entering new RetrievalQA chain...[0m


## Manual Evaluation

In [1]:
import langchain
langchain.debug = True

In [2]:
qa.run(examples[0]["query"])

NameError: name 'qa' is not defined

In [3]:
# Turn off the debug mode
langchain.debug = False

## LLM assisted evaluation

In [4]:
predictions = qa.apply(examples)

NameError: name 'qa' is not defined

In [5]:
from langchain.evaluation.qa import QAEvalChain

In [6]:
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

NameError: name 'ChatOpenAI' is not defined

In [None]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [None]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()