In [6]:
import os

from typing_extensions import override

import secret_key
from judgeval.tracer import Tracer
from google import genai

gen_ai_client = genai.client.Client(api_key=secret_key.GEMINI_API_KEY)

In [7]:
judgment = Tracer(project_name="my_project", api_key=secret_key.JUDGMENT_API_KEY, organization_id=secret_key.JUDGMENT_ORG_ID)

@judgment.observe(span_type="tool")
def format_question(question: str) -> str:
    # dummy tool
    return f"Question : {question}"


@judgment.observe(span_type="function")
def run_agent(prompt: str) -> str:
    task = format_question(prompt)
    response = gen_ai_client.models.generate_content(model="gemini-2.5-pro", contents=task)
    return response.text


In [8]:
print(run_agent("What is the capital of the United States?"))

The capital of the United States is **Washington, D.C.**

"D.C." stands for the **District of Columbia**, a federal district created specifically to be the seat of the U.S. government.


In [9]:
from judgeval import JudgmentClient
from judgeval.data import Example
from judgeval.scorers import FaithfulnessScorer
client = JudgmentClient(api_key=secret_key.JUDGMENT_API_KEY, organization_id=secret_key.JUDGMENT_ORG_ID)
task = "What is the capital of the United States?"
example = Example(
    input=task,
    actual_output=run_agent(task),  # e.g. "The capital of the U.S. is Washington, D.C."
    retrieval_context=["Washington D.C. was founded in 1790 and became the capital of the U.S."],
)
scorer = FaithfulnessScorer(threshold=0.5)
client.assert_test(
    examples=[example],
    scorers=[scorer],
    model="gemini-2.5-pro",
)

2025-07-18 15:46:10 - judgeval - INFO - Successfully initialized JudgmentClient!


                    

AssertionError: [{'failed_scorers': [ScorerDataJudgmentType(name='Faithfulness', threshold=0.5, success=False, score=None, reason='', strict_mode=False, evaluation_model='gemini-2.5-pro', error="'exc_info' is an invalid keyword argument for print()", additional_metadata=None)]}]

In [13]:
from judgeval.scorers import AnswerCorrectnessScorer

client = JudgmentClient(api_key=secret_key.JUDGMENT_API_KEY, organization_id=secret_key.JUDGMENT_ORG_ID)
task = "What is the capital of the United States?"
example = Example(
    input=task,
    actual_output=run_agent(task),  # e.g. "The capital of the U.S. is Washington, D.C."
    retrieval_context=["Washington D.C. was founded in 1790 and became the capital of the U.S."],
)
scorer = AnswerCorrectnessScorer(threshold=0.5)
client.assert_test(
    examples=[example],
    scorers=[scorer],
    model="gemini-2.5-pro",
    override=True
)

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
