In [1]:
import os

from dotenv import load_dotenv
load_dotenv()

import nest_asyncio
nest_asyncio.apply()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [27]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("data/Trump/").load_data()

In [28]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents)
rag_application = index.as_query_engine()

In [32]:
from deepeval.integrations.llama_index import DeepEvalFaithfulnessEvaluator

# An example input to your RAG application
user_input = "when is assassination attempt on Trump?"

# LlamaIndex returns a response object that contains
# both the output string and retrieved nodes
response_object = rag_application.query(user_input)

evaluator = DeepEvalFaithfulnessEvaluator()
evaluation_result = evaluator.evaluate_response(
    query=user_input, response=response_object
)
print(evaluation_result)

Output()

query='when is assassination attempt on Trump?' contexts=None response='The assassination attempt on Former President Donald Trump occurred on a Saturday night.' passing=True feedback='The score is 1.00 because there are no contradictions, indicating the actual output is perfectly aligned with the retrieval context. Great job!' score=1.0 pairwise_source=None invalid_result=False invalid_reason=None


In [33]:
evaluation_result.response

'The assassination attempt on Former President Donald Trump occurred on a Saturday night.'

In [34]:
evaluation_result.feedback

'The score is 1.00 because there are no contradictions, indicating the actual output is perfectly aligned with the retrieval context. Great job!'

In [35]:
from deepeval.integrations.llama_index import DeepEvalAnswerRelevancyEvaluator

# An example input to your RAG application
user_input = "what is the date assassination attempt on Trump?"

# LlamaIndex returns a response object that contains
# both the output string and retrieved nodes
response_object = rag_application.query(user_input)

evaluator = DeepEvalAnswerRelevancyEvaluator()
evaluation_result = evaluator.evaluate_response(
    query=user_input, response=response_object
)
print(evaluation_result)

Output()

query='what is the date assassination attempt on Trump?' contexts=None response='The assassination attempt on Donald Trump took place on a Saturday night.' passing=True feedback='The score is 1.00 because the response is completely relevant with no irrelevant statements. Great job on staying focused and on point!' score=1.0 pairwise_source=None invalid_result=False invalid_reason=None


In [36]:
evaluation_result.feedback

'The score is 1.00 because the response is completely relevant with no irrelevant statements. Great job on staying focused and on point!'

In [37]:
from deepeval.integrations.llama_index import DeepEvalContextualRelevancyEvaluator

# An example input to your RAG application
user_input = "what is the date assassination attempt on Trump?"

# LlamaIndex returns a response object that contains
# both the output string and retrieved nodes
response_object = rag_application.query(user_input)

evaluator = DeepEvalContextualRelevancyEvaluator()
evaluation_result = evaluator.evaluate_response(
    query=user_input, response=response_object
)
print(evaluation_result)

Output()

query='what is the date assassination attempt on Trump?' contexts=None response='The assassination attempt on Donald Trump took place on a Saturday night.' passing=False feedback='The score is 0.00 because the context lacks the specific date information requested, despite detailing other aspects of the assassination attempt on Trump.' score=0.0 pairwise_source=None invalid_result=False invalid_reason=None


In [38]:
evaluation_result.feedback

'The score is 0.00 because the context lacks the specific date information requested, despite detailing other aspects of the assassination attempt on Trump.'