In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
from openai import AsyncOpenAI
import os
from ragas.llms import llm_factory

# Setup LLM
client = AsyncOpenAI(
    base_url=os.getenv('OPENROUTER_BASE_URL'),
    api_key=os.getenv('OPENROUTER_API_KEY')
)

# Create metric - use OpenRouter model path
llm = llm_factory("openai/gpt-4o-mini", client=client)  # or any model like "anthropic/claude-3-haiku"

#### Context precision - Experiment

In [None]:
import os
from ragas.metrics.collections import ContextPrecision

# Create metric
scorer = ContextPrecision(llm=llm)

# Evaluate
result = await scorer.ascore(
    user_input="Where is the Eiffel Tower located?",
    reference="The Eiffel Tower is located in Paris.",
    retrieved_contexts=[
        "The Brandenburg Gate is located in Berlin.", 
        "The Eiffel Tower is located in Paris."
    ]
)
print(f"Context Precision Score: {result.value}")

  from .autonotebook import tqdm as notebook_tqdm


Context Precision Score: 0.49999999995


### Context Recall - Experiment

In [5]:
from ragas.metrics.collections import ContextRecall

# Create metric
scorer = ContextRecall(llm=llm)

# Evaluate
result = await scorer.ascore(
    user_input="Where is the Eiffel Tower located?",
    retrieved_contexts=["Paris is the capital of France."],
    reference="The Eiffel Tower is located in Paris."
)
print(f"Context Recall Score: {result.value}")

Context Recall Score: 1.0


### Answer Relevancy - Experiment

In [None]:
from ragas.embeddings.base import embedding_factory
from ragas.metrics.collections import AnswerRelevancy

embeddings = embedding_factory("openai", model="text-embedding-3-small", client=client)

# Create metric
scorer = AnswerRelevancy(llm=llm, embeddings=embeddings)

# Evaluate
result = await scorer.ascore(
    user_input="When was the first super bowl?",
    response="Sunset was amazing!"
)
print(f"Answer Relevancy Score: {result.value}")

Answer Relevancy Score: 0.15604801501637802


### Faithfulness - Experiment

In [None]:
from ragas.metrics.collections import Faithfulness


# Create metric
scorer = Faithfulness(llm=llm)

# Evaluate
result = await scorer.ascore(
    user_input="When was the first super bowl?",
    response="The first superbowl was held on Jan 15, 1968",
    retrieved_contexts=[
        "The First AFLâ€“NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles."
    ]
)
print(f"Faithfulness Score: {result.value}")

Faithfulness Score: 0.0
