# 包下载

In [1]:
from pydantic import BaseModel,Field
from typing import Optional,List
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import StateGraph, START ,END
from langgraph.graph.message import add_messages
from typing import Annotated
from typing_extensions import TypedDict

# LLM定义

In [13]:
from langchain_ollama import ChatOllama
from langchain_community.llms import VLLMOpenAI
import os
os.environ["OLLAMA_HOST"]="http://localhost:11450"

llm = ChatOllama(
    model="qwen2.5",
    temperature=0,
    max_token = 300000,    
    # other params...
)

llm_multimodal = ChatOllama(
    model="llama3.2-vision:90b",
    temperature=0,
    # other params...
)

# 文档读取

In [11]:
from langchain.document_loaders import ArxivLoader
doc = ArxivLoader(query="1706.03762", load_max_docs=1).load()

In [7]:
doc_content = doc[0].page_content

# 读取CSV数据

In [4]:
import pandas as pd
qa_data = pd.read_csv("../test_dataset_generation/qa_data.csv")

# 向量数据库构建

In [14]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text:latest")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500)
docs = text_splitter.split_documents(doc)
vectorstore = Chroma.from_documents(docs, embeddings)

  relevant_docs = base_retriever.get_relevant_documents("What is Retrieval Augmented Generation?")


In [33]:
retriever = vectorstore.as_retriever(search_kwargs={"k" : 2})
relevant_docs = retriever.get_relevant_documents("What is the transformer?")

In [59]:
from langchain import PromptTemplate

template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 

Question: {query} 

Context: {context} 

Answer:
"""

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
prompt = ChatPromptTemplate.from_template(template)
qa_chain = prompt | llm | StrOutputParser()

# 评估数据构建

In [36]:
questions = qa_data["question"].tolist()
ground_truths = qa_data["answer"].tolist()

In [62]:
from ragas import EvaluationDataset
dataset = []

def format_docs(relevant_docs):
    return "\n".join(doc.page_content for doc in relevant_docs)

for query, reference in zip(questions, ground_truths):
    relevant_docs = retriever.invoke(query)    
    response = qa_chain.invoke({"context": format_docs(relevant_docs), "query": query})
    dataset.append(
        {
            "user_input": str(query),
            "retrieved_contexts": [str(rdoc.page_content) for rdoc in relevant_docs],
            "response": str(response),
            "reference": str(reference),
        }
    )

evaluation_dataset = EvaluationDataset.from_list(dataset)

# 开始测试

In [64]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

evaluator_llm = LangchainLLMWrapper(llm)


result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=evaluator_llm,
)

result

Evaluating:  24%|██████████████████████████████████████                                                                                                                       | 67/276 [02:35<07:46,  2.23s/it]Exception raised in Job[59]: OutputParserException(Invalid json output: {"claims": ["Dot-product attention is faster than additive attention.", "Dot-product attention is more space-efficient than additive attention.", "Dot-product attention can be implemented using highly optimized matrix multiplication code.", "The implementation advantage of dot-product attention leads to significant performance benefits.", "Significant performance benefits are especially noticeable as the dimensionality \(d_k\) increases."]}
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE )
Evaluating:  33%|███████████████████████████████████████████████████▊                                                                                                 

{'context_recall': 0.8022, 'faithfulness': 0.8733, 'factual_correctness(mode=f1)': 0.4983}