# Faithfulness Evaluator

In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")



In [2]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [3]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

In [4]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Response,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.node_parser import SentenceSplitter
import pandas as pd

pd.set_option("display.max_colwidth", 0)

In [5]:
# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-4")

evaluator_gpt4 = FaithfulnessEvaluator(llm=gpt4)

The data is extracted from the [New York City](https://en.wikipedia.org/wiki/New_York_City) wikipedia page

In [6]:
documents = SimpleDirectoryReader("data/").load_data()

print(documents)

[Document(id_='c90d49be-5e57-4888-b069-0c9f0cc5a412', embedding=None, metadata={'page_label': '1', 'file_name': 'NY.pdf', 'file_path': '/Users/chris/Desktop/7980/CS7980_Capstone_RBCMuseum/eval/data/NY.pdf', 'file_type': 'application/pdf', 'file_size': 7353589, 'creation_date': '2024-10-08', 'last_modified_date': '2024-10-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="New York\nCity\nMidtown Manhattan with the Empire State\nBuilding (center) and Lower Manhattan with\nOne WTC (background)\nUN\nheadquarters\nStatue of\nLiberty\nTimes Square\nUnisphere\n Central Park\n Brooklyn\nBridge\nVerrazzano-Narrows\nBridge\nBronx Zoo\nFlag\nSeal\nWordmark\nNew York City\nNew York, often called New York City[b] or NYC , is the\nmost populous city in the United S

In [7]:
# create vector index
splitter = SentenceSplitter(chunk_size=512)
vector_index = VectorStoreIndex.from_documents(
    documents, transformations=[splitter]
)

In [8]:
from llama_index.core.evaluation import EvaluationResult


# define jupyter display function
def display_eval_df(response: Response, eval_result: EvaluationResult) -> None:
    if response.source_nodes == []:
        print("no response!")
        return
    eval_df = pd.DataFrame(
        {
            "Response": str(response),
            "Source": response.source_nodes[0].node.text[:1000] + "...",
            "Evaluation Result": "Pass" if eval_result.passing else "Fail",
            "Reasoning": eval_result.feedback,
        },
        index=[0],
    )
    eval_df = eval_df.style.set_properties(
        **{
            "inline-size": "600px",
            "overflow-wrap": "break-word",
        },
        subset=["Response", "Source"]
    )
    display(eval_df)

In [9]:
query_engine = vector_index.as_query_engine()
response_vector = query_engine.query("How did New York City get its name?")
eval_result = evaluator_gpt4.evaluate_response(response=response_vector)

In [10]:
display_eval_df(response_vector, eval_result)

Unnamed: 0,Response,Source,Evaluation Result,Reasoning
0,"New York City got its name from the English Duke of York, who later became King James II of England.","University of Oklahoma Press . p. 28. ISBN 978-0-8061-8965-9 . 39. Rankin, Rebecca B.; Rodgers, Cleveland (1948). New York: The W orld's Capital City , Its Development and Contributions to Progress (https://archive.org/details/in.ernet.dli.2015.226262) . Harper . 40. WPA Writer's Project (2004). A Maritime History of New York (https://books.google.com/books?id=o08K8jlMI -IC). Going Coastal Productions. p. 246. ISBN 0-9729803-1-8 . 41. Lankevich, George J. (2002). New York City: A Short History (https://archive.org/details/newyorkcity00geor) . NYU Press . p. 2 (https://archive.org/details/newyorkcity00geor/page/2) . ISBN 978-0-8147-5186-2 . 42. ""The Hudson River"" (http://www .newnetherlandinstitute.org/history-and-heritage/digital-exhibitions/a-tour-of- new-netherland/hudson-river/) . New Netherland Institute . Retrieved July 10, 2016 . 43. Roberts, Sam (October 2, 2012). ""Honoring a V ery Early New Yorker"" (http://cityroom.blogs.nytimes.com/20 12/10/02/honoring-a-very-early-new...",Fail,NO


### Benchmark on Generated Question

- It can automatically generate questions from the documents

In [11]:
from llama_index.core.evaluation import DatasetGenerator

question_generator = DatasetGenerator.from_documents(documents)
eval_questions = question_generator.generate_questions_from_nodes(5)

eval_questions

  return cls(
  return QueryResponseDataset(queries=queries, responses=responses_dict)


['What is the population of New York City as of 2023?',
 'How many square miles does New York City cover?',
 'What are the five boroughs that make up New York City?',
 'What is New York City known for being a global center of?',
 'How many languages are spoken in New York City?']

In [38]:
import asyncio


def evaluate_query_engine(query_engine, questions):
    c = [query_engine.aquery(q) for q in questions]
    results = asyncio.run(asyncio.gather(*c))
    print("finished query")

    total_correct = 0
    for r in results:
        # evaluate with gpt 4
        eval_result = (
            1 if evaluator_gpt4.evaluate_response(response=r).passing else 0
        )
        total_correct += eval_result

    return total_correct, len(results)

In [39]:
vector_query_engine = vector_index.as_query_engine()
correct, total = evaluate_query_engine(vector_query_engine, eval_questions[:5])

print(f"score: {correct}/{total}")

finished query
score: 5/5
