# LangChain: Evaluation
<목차>
- (1) Q&A App 생성
  - (1-1) 하드코딩 정답 생성
- 2. 수동 평가
- 3. LLM-assisted 평가


## Outline:

* Example generation
* Manual evaluation (and debuging)
* LLM-assisted evaluation

### 1 Q&A App 생성

In [None]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

# LLM이 생성한 QnA 예시
from langchain.evaluation.qa import QAGenerateChain

# LLM-assisted 평가
from langchain.evaluation.qa import QAEvalChain

In [None]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')
data = loader.load()

# 인덱스를 생성
index = VectorstoreIndexCreator( # 24.01에 deprecated 된 `text-davinci-003`을 사용한다.
    vectorstore_cls = DocArrayInMemorySearch
).from_loaders([loader])

llm = ChatOpenAI(temperature=0.0)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    verbose=True,
    chain_type_kwargs ={
        "document_separator": "<<<<>>>>>"
    }
)

In [None]:
print(data[10])
print(data[11])

### 1-1. 하드코딩 정답 생성

In [None]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())

new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in data[:5]]
)

print(new_examples[0]) # 정답을 생성한다??

examples += new_examples

qa.run(examples[0]["query"])

### 2. 수동 평가

In [None]:
import langchain
langchain.debug = True

qa.run(examples[0]["query"])

# Turn off the debug mode
langchain.debug = False

### 3. LLM-assisted 평가

In [None]:
predictions = qa.apply(examples)

llm = ChatOpenAI(temperature=0)
eval_chain = QAEvalChain.from_llm(llm)

graded_outputs = eval_chain.evaluate(examples, predictions)

for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    print("Predicted Grade: " + graded_outputs[i]['text'])
    print()

print(graded_outputs[0])

## LangChain evaluation platform
The LangChain evaluation platform, LangChain Plus, can be accessed here https://www.langchain.plus/.  
Use the invite code `lang_learners_2023`