In [1]:
import os
from dotenv import load_dotenv
load_dotenv('../.env')

True

![image.png](../imgs/Screenshot%202024-07-30%20at%202.47.57 PM.png)

In [2]:
from langchain_community.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import WikipediaLoader


# loader = TextLoader("./nyc_wikipedia/nyc_text.txt")
# loader.load()
# wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

# Load the data from the Wikipedia page on New York City
# data = wiki_loader.load_data("https://en.wikipedia.org/wiki/New_York_City")
docs = WikipediaLoader(query="New_York_City", load_max_docs=2).load()

# NVIDIA AI Foundation Models
embedding_model = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END")


vector_store_from_document = FAISS.from_documents(docs, embedding=embedding_model)

sem_retriever = vector_store_from_document.as_retriever()

# index = VectorstoreIndexCreator().from_loaders(docs)


llm = ChatNVIDIA(model="meta/llama-3.1-8b-instruct")
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=sem_retriever,
    return_source_documents=True,
)



In [3]:
# testing it out

question = "How did New York City get its name?"
result = qa_chain({"query": question})
result["result"]

  warn_deprecated(


'New York City was named in honor of the Duke of York (later King James II of England), who was the eldest brother of King Charles II. In 1664, the English took control of the colony from the Dutch, and the city was renamed New York in 1664, along with the surrounding colony. This was in recognition of the Duke of York, who was granted the lands by his brother, King Charles II. The name "New York" has been associated with the city ever since.'

In [4]:
eval_questions = [
    "What is the population of New York City as of 2020?"

]

eval_answers = [
    "8,804,190"]

examples = [
    {"query": q, "ground_truth": [eval_answers[i]]}
    for i, q in enumerate(eval_questions)
]

In [5]:
result = qa_chain({"query": eval_questions[0]})
result["result"]

'According to the United States Census Bureau, the population of New York City as of 2020 was approximately 8,804,190 people. This makes New York City the most populous city in the United States.'

In [6]:
result = qa_chain(examples[0])
result["result"]

In [None]:
# from ragas.metrics import faithfulness, answer_relevancy, context_relevancy, context_recall
# from ragas.langchain import RagasEvaluatorChain

# # make eval chains
# eval_chains = {
#     m.name: RagasEvaluatorChain(metric=m) 
#     for m in [faithfulness, answer_relevancy, context_relevancy, context_recall]
# }

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)

# create evaluation chains
faithfulness_chain = RagasEvaluatorChain(metric=faithfulness)
# answer_rel_chain = RagasEvaluatorChain(metric=answer_relevancy)
# context_rel_chain = RagasEvaluatorChain(metric=context_precision)
# context_recall_chain = RagasEvaluatorChain(metric=context_recall)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Recheck the result that we are going to validate.
result

In [None]:
eval_result = faithfulness_chain(result)
eval_result["faithfulness_score"]

High faithfulness_score means that there are exact consistency between the source documents and the answer.

In [None]:
fake_result = result.copy()
fake_result["result"] = "we are the champions"
eval_result = faithfulness_chain(fake_result)
eval_result["faithfulness_score"]

eval_result = context_recall_chain(result)
eval_result["context_recall_score"]

In [None]:
from langchain.schema import Document

fake_result = result.copy()
fake_result["source_documents"] = [Document(page_content="I love christmas")]
eval_result = context_recall_chain(fake_result)
eval_result["context_recall_score"]

In [None]:
# run the queries as a batch for efficiency
predictions = qa_chain.batch(examples)

# evaluate
print("evaluating...")
r = faithfulness_chain.evaluate(examples, predictions)
r

In [None]:
# evaluate context recall
print("evaluating...")
r = context_recall_chain.evaluate(examples, predictions)
r