# 4.5.1 召回环节评估

In [None]:
from llama_index.evaluation import generate_question_context_pairs
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext,OpenAIEmbedding
from llama_index.node_parser import SentenceSplitter
from llama_index.llms import OpenAI
import os
from llama_index.evaluation import RetrieverEvaluator
import pandas as pd 

os.environ["OPENAI_API_BASE"] = "xxx"
os.environ["OPENAI_API_KEY"] = "xxx"
llm = OpenAI(model="gpt-35-turbo")
# 文本加载与切块，data文件夹中存放txt文件
documents = SimpleDirectoryReader("./data/").load_data()
node_parser = SentenceSplitter(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
# 设置文本块id
for idx, node in enumerate(nodes):
    node.id_ = f"node_{idx}"
# 构建向量数据库
embed_model = OpenAIEmbedding()
service_context = ServiceContext.from_defaults(embed_model=embed_model)
vector_index = VectorStoreIndex(nodes, service_context=service_context)
retriever = vector_index.as_retriever(similarity_top_k=2)
# 根据原始文本块生成问题
qa_dataset = generate_question_context_pairs(
    nodes, llm=llm, num_questions_per_chunk=2
)
# 定义评估器
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)
# 在所有生成的问题上评估召回效果
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)
result = []
for eval_result in eval_results:
    metric_dict = eval_result.metric_vals_dict
    result.append(metric_dict)
result_df = pd.DataFrame(result)
hit_rate = result_df["hit_rate"].mean()
mrr = result_df["mrr"].mean()
print("hit_rate:", hit_rate)
print("mrr:", mrr)

# 4.5.2 模型回答评估

In [None]:
from llama_index.evaluation import (
    DatasetGenerator,
    QueryResponseDataset,
)
from llama_index import ServiceContext
from llama_index.node_parser import SentenceSplitter
from llama_index.llms import OpenAI
import os
from llama_index import SimpleDirectoryReader
os.environ["OPENAI_API_BASE"] = "xxx"
os.environ["OPENAI_API_KEY"] = "xxx"

eval_service_context = ServiceContext.from_defaults(
    llm=OpenAI(model="gpt-35-turbo")
)
# 文本加载与切块，data文件夹中存放txt文件
documents = SimpleDirectoryReader("./data/").load_data()
node_parser = SentenceSplitter(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)
# 定义问答数据生成器
dataset_generator = DatasetGenerator(
    nodes[:5],
    service_context=eval_service_context,
    show_progress=True,
    num_questions_per_chunk=1,
)
# 生成问答对
eval_dataset = await dataset_generator.agenerate_dataset_from_nodes(num=6)
# 保存问答对
eval_dataset.save_json("data/qa_dataset.json")
# 加载问答对
eval_dataset = QueryResponseDataset.from_json("data/qa_dataset.json")

In [None]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
import pandas as pd
import os
from ragas.metrics import faithfulness,answer_relevancy,context_relevancy
from ragas.llama_index import evaluate
import nest_asyncio
nest_asyncio.apply()
os.environ["OPENAI_API_BASE"] = "xxx"
os.environ["OPENAI_API_KEY"] = "xxx"

documents = SimpleDirectoryReader("./data/nyc_wikipedia/").load_data()
vector_index = VectorStoreIndex.from_documents(
    documents, service_context=ServiceContext.from_defaults(chunk_size=512)
)
query_engine = vector_index.as_query_engine()
eval_questions = [
    "What is the population of New York City as of 2020?",
    "Which borough of New York City has the highest population?",
    "What is the economic significance of New York City?",
    "How did New York City get its name?",
    "What is the significance of the Statue of Liberty in New York City?",
]

eval_answers = [
    "8,804,000",  # incorrect answer
    "Queens",  # incorrect answer
    "New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.",
    "New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.",
    "The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.",
]

eval_answers = [[a] for a in eval_answers]
metrics = [faithfulness, answer_relevancy, context_relevancy]
result = evaluate(query_engine, metrics, eval_questions, eval_answers)
print(result)