# LangChain: Evaluation¶
### Outline:
* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

# Create our QandA application

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [5]:
file = 'GlobalPowerPlantDB_USonly.csv'
loader = CSVLoader(file_path=file)
data = loader.load()

In [6]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [7]:
llm = ChatOpenAI(temperature = 0.0)
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

# Coming up with test datapoints

In [9]:
data[30]

Document(page_content='country: USA\ncountry_long: United States of America\nname: 491 E 48th Street\ngppd_idnr: USA0007268\ncapacity_mw: 161.7\nlatitude: 42.7553\nlongitude: -86.0853\nprimary_fuel: Gas\nother_fuel1: Oil\nother_fuel2: \nother_fuel3: \ncommissioning_year: 1996.131107\nowner: City of Holland\nsource: U.S. Energy Information Administration\nurl: http://www.eia.gov/electricity/data/browser/\ngeolocation_source: U.S. Energy Information Administration\nwepp_id: 22235\nyear_of_capacity_data: 2019\ngeneration_gwh_2013: 14.308\ngeneration_gwh_2014: 14.985\ngeneration_gwh_2015: 0\ngeneration_gwh_2016: 14.034\ngeneration_gwh_2017: 0\ngeneration_gwh_2018: 50.703\ngeneration_gwh_2019: 21.103\ngeneration_data_source: U.S. Energy Information Administration\nestimated_generation_gwh_2013: \nestimated_generation_gwh_2014: \nestimated_generation_gwh_2015: \nestimated_generation_gwh_2016: \nestimated_generation_gwh_2017: 570.23\nestimated_generation_note_2013: NO-ESTIMATION\nestimated_ge

# Hard-coded examples

In [37]:
examples = [
    {
        "query": "How many solar plants in the USA?",
        "answer": "3283"
    },
    {
        "query": "What states are they located in?" ,
        "answer": "California, Texas, Nevada"
    }
]

# LLM-Generated examples

In [38]:
# Four boxes below generate Q&A pair to evaluate model
from langchain.evaluation.qa import QAGenerateChain


In [39]:
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

In [40]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

[32;1m[1;3m[chain/start][0m [1m[1:chain:QAGenerateChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:QAGenerateChain > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are a teacher coming up with questions to ask on a quiz. \nGiven the following document, please generate a question and answer based on that document.\n\nExample Format:\n<Begin Document>\n...\n<End Document>\nQUESTION: question here\nANSWER: answer here\n\nThese questions should be detailed and be based explicitly on information in the document. Begin!\n\n<Begin Document>\npage_content='country: USA\\ncountry_long: United States of America\\nname: 100 Brook Hill Drive Solar\\ngppd_idnr: USA0063292\\ncapacity_mw: 2\\nlatitude: 41.093\\nlongitude: -73.9828\\nprimary_fuel: Solar\\nother_fuel1: \\nother_fuel2: \\nother_fuel3: \\ncommissioning_year: 2016\\nowner: Diamond Properties\\nsource: U.S. Energy Information Administration\\nurl: http://w

[36;1m[1;3m[llm/end][0m [1m[1:chain:QAGenerateChain > 2:llm:ChatOpenAI] [14.32s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What is the name of the solar power plant and who is the owner?\nANSWER: The name of the solar power plant is 100 Brook Hill Drive Solar and the owner is Diamond Properties.",
        "generation_info": null,
        "message": {
          "content": "QUESTION: What is the name of the solar power plant and who is the owner?\nANSWER: The name of the solar power plant is 100 Brook Hill Drive Solar and the owner is Diamond Properties.",
          "additional_kwargs": {},
          "example": false
        }
      }
    ],
    [
      {
        "text": "QUESTION: What is the primary fuel of the power plant with gppd_idnr USA0062660?\nANSWER: The primary fuel of the power plant with gppd_idnr USA0062660 is solar.",
        "generation_info": null,
        "message": {
          "content": "QUESTION: What is the pr

In [41]:
new_examples[0]

{'query': 'What is the name of the solar power plant and who is the owner?',
 'answer': 'The name of the solar power plant is 100 Brook Hill Drive Solar and the owner is Diamond Properties.'}

In [43]:
examples += new_examples

In [44]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "How many solar plants in the USA?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How many solar plants in the USA?",
  "context": "country: USA\ncountry_long: United States of America\nname: Camp Solar Plant\ngppd_idnr: USA0058893\ncapacity_mw: 2.4\nlatitude: 32.9647\nlongitude: -84.5439\nprimary_fuel: Solar\nother_fuel1: \nother_fuel2: \nother_fuel3: \ncommissioning_year: 2014\nowner: Axium US Solar Holdings LLC\nsource: U.S. Energy Information Administration\nurl: http://www.eia.gov/electricity/data/browser/\ngeolocation_source: U.S. Energy Information Administration\nwepp_id: 67792\nyear_of_capacity_data: 2019\ngeneration_gwh_2013: 0.179\

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain > 4:llm:ChatOpenAI] [1.92s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA.",
        "generation_info": null,
        "message": {
          "content": "I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA.",
          "additional_kwargs": {},
          "example": false
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 1320,
      "completion_tokens": 26,
      "total_tokens": 1346
    },
    "model_name": "gpt-3.5-turbo"
  },
  "run": null
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] [1.92s] Exiting Chain run with output:
[0m{
  "text": 

"I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA."

# Manual Evaluation

In [45]:
import langchain
langchain.debug = True

In [46]:
qa.run(examples[0]["query"])

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "How many solar plants in the USA?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How many solar plants in the USA?",
  "context": "country: USA\ncountry_long: United States of America\nname: Camp Solar Plant\ngppd_idnr: USA0058893\ncapacity_mw: 2.4\nlatitude: 32.9647\nlongitude: -84.5439\nprimary_fuel: Solar\nother_fuel1: \nother_fuel2: \nother_fuel3: \ncommissioning_year: 2014\nowner: Axium US Solar Holdings LLC\nsource: U.S. Energy Information Administration\nurl: http://www.eia.gov/electricity/data/browser/\ngeolocation_source: U.S. Energy Information Administration\nwepp_id: 67792\nyear_of_capacity_data: 2019\ngeneration_gwh_2013: 0.179\

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain > 4:llm:ChatOpenAI] [1.63s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA.",
        "generation_info": null,
        "message": {
          "content": "I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA.",
          "additional_kwargs": {},
          "example": false
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 1320,
      "completion_tokens": 26,
      "total_tokens": 1346
    },
    "model_name": "gpt-3.5-turbo"
  },
  "run": null
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] [1.63s] Exiting Chain run with output:
[0m{
  "text": 

"I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA."

In [47]:
langchain.debug = False

In [48]:
predictions = qa.apply(examples)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [49]:

from langchain.evaluation.qa import QAEvalChain

In [50]:
# Creat above chain with language model
llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

In [51]:
graded_outputs = eval_chain.evaluate(examples, predictions)

In [52]:
# All below are output by the language mdoel
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

Example 0:
Question: How many solar plants in the USA?
Real Answer: 3283
Predicted Answer: I'm sorry, I don't have that information. The context provided only includes information about four specific solar plants in the USA.
Predicted Grade: INCORRECT

Example 1:
Question: What states are they located in?
Real Answer: California, Texas, Nevada
Predicted Answer: I'm sorry, I don't have enough information to answer your question. Could you please specify which power plant you are referring to?
Predicted Grade: CORRECT

Example 2:
Question: What is the name of the solar power plant and who is the owner?
Real Answer: The name of the solar power plant is 100 Brook Hill Drive Solar and the owner is Diamond Properties.
Predicted Answer: For the Royal Wine Corp Solar Power Plant, the name of the plant is Royal Wine Corp Solar Power Plant and the owner is Royal Wine Corp.

For the Sun City Project LLC, the name of the plant is Sun City Project LLC and the owner is Avenal Solar Holdings LLC.

Fo

In [None]:
# What to do if predicted answers incorrect?
# Try to access the UI that tracks what is going on 
#  under the hood (from langchain plus)
# --generate flywheel of datapoints to learn from
