# Metrics

###LLM-based metrics

All LLM based metrics in ragas are inherited from MetricWithLLM class. These metrics expects a LLM object to be set before scoring.

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [3]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192")

In [3]:
from ragas.metrics import FactualCorrectness

scorer = FactualCorrectness(llm= llm)

###Non-LLM-based metrics

These metrics rely on traditional methods to evaluate the performance of the AI application, such as string similarity, BLEU score, etc. Due to the same, these metrics are known to have a lower correlation with human evaluation.

In [12]:
from ragas import SingleTurnSample, EvaluationDataset

# Sample 1
sample = SingleTurnSample(
    user_input="What is the capital of Germany?",
    retrieved_contexts=["Berlin is the capital and largest city of Germany."],
    response="The capital of Germany is Berlin.",
    reference="Berlin",
)

In [None]:
## SingleTurn Metrics
from ragas.metrics import FactualCorrectness

scorer = FactualCorrectness()
await scorer.single_turn_ascore(sample)

In [None]:
##MultiTurn Metrics
from ragas.metrics import AgentGoalAccuracyWithoutReference
from ragas import MultiTurnSample

scorer = AgentGoalAccuracyWithoutReference()
await scorer.multi_turn_ascore(sample)


# Context Precision

- It is a metric that measures the proportion of relevant chunks in the retrieved_contexts.
- It is calculated as the mean of the precision@k for each chunk in the context.
- Precision@k is the ratio of the number of relevant chunks at rank k to the total number of chunks at rank k.

##LLM Based Context Precision

In [6]:
# Context Precision without referen
from ragas import SingleTurnSample
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextPrecisionWithoutReference

evaluator_llm = LangchainLLMWrapper(llm)
context_precision = LLMContextPrecisionWithoutReference(llm=evaluator_llm)

sample = SingleTurnSample(
    user_input="Where is the Eiffel Tower located?",
    response="The Eiffel Tower is located in Paris.",
    retrieved_contexts=["The Eiffel Tower is located in Paris."], 
)

await context_precision.single_turn_ascore(sample)

0.9999999999

In [7]:
# Context Precision with reference
from ragas.metrics import LLMContextPrecisionWithReference

ref_context_precision = LLMContextPrecisionWithReference(llm= evaluator_llm)

sample_2 = SingleTurnSample(
    user_input="Where is the Eiffel Tower located?",
    reference="The Eiffel Tower is located in Paris.",
    retrieved_contexts=["The Eiffel Tower is located in Paris."],
)

await ref_context_precision.single_turn_ascore(sample_2)

0.9999999999

##Non LLM Based Context Precision

In [8]:
from ragas.metrics import NonLLMContextPrecisionWithReference

NonLLM_context_precision = NonLLMContextPrecisionWithReference()

sample_3 = SingleTurnSample(
    retrieved_contexts=["The Eiffel Tower is located in Paris."], 
    reference_contexts=["Paris is the capital of France.", "The Eiffel Tower is one of the most famous landmarks in Paris."]
)

await NonLLM_context_precision.single_turn_ascore(sample_3)

0.9999999999