# Example 1

Most likely some comments here like - what will be presented, and the structure of the notebooks

In [None]:
# TODO: Create another test scenario where each type of assertion would be included
# TODO: For this, config should also be updated
# TODO: Talk about sub components separately (step1, step2, assertion1, assertion2 etc.)

## Installation

In [13]:
# %pip install contextcheck
# %pip install devtools

## Imports

In [40]:
from contextcheck import TestScenario
from contextcheck.executors.executor import Executor # NOTE RB: Maybe Executor should be at the most outer layer for import
from devtools import pprint # Needed for pydantic models pretty formatting
import yaml

## Scenario creation

### Explain config

In [43]:
# Define configuration in yaml
# endpoint_under_test entry defines the tested endpoint
# The list of available kinds and models is available under: [here comes the link]
yaml_from_string = yaml.safe_load("""
config:
   endpoint_under_test:
      kind: openai
      # Optional model name
      # model: gpt-4o-mini
""")
yaml_from_string

{'config': {'endpoint_under_test': {'kind': 'openai'}}}

### Explain steps

In [45]:
# Each test scenario consists of at least on testing step
# Each step can by defined by its name (optional), request and asserts (optional)
# `request` is a message to an llm
# `asserts` is a list of assertions done on llm response
yaml_from_string = yaml.safe_load("""
steps:
   - name: Check capital of Poland
     request: 'What is the capital city of Poland?'
     asserts:
        - '"Warsaw" in response.message'
        - 'response.stats.conn_duration < 3'
""")
yaml_from_string

{'steps': ['Write success in the response',
  {'name': 'Check capital of Poland',
   'request': 'What is the capital city of Poland?',
   'asserts': ['"Warsaw" in response.message',
    'response.stats.conn_duration < 3']}]}

#### Explain step (general schema)

#### Explain assertions

##### Explain llm assertions

In [46]:
# LLM metrics use another llm to assess the response of another llm
# for this eval_endpoint should be added in config section to define evaluation endpoint

##### Explain eval assertions

In [None]:
# Eval assertions use python's build in eval to evaluate the use made evaluations

##### Explain deterministic assertions

## Final scenario

In [26]:
# When the test scenario is finally ready we can load it
test_scenario_file_path = "../tests/scenario_openai.yaml"
test_scenario = TestScenario.from_yaml(file_path=test_scenario_file_path)

In [27]:
# Inspect the structure of test_scenario
pprint(test_scenario)

TestScenario(
    steps=[
        TestStep(
            name='Write success in the response',
            request=RequestBase(
                message='Write success in the response',
            ),
            response=None,
            asserts=[],
            result=None,
        ),
        TestStep(
            name='Check capital of Poland',
            request=RequestBase(
                message='What is the capital city of Poland?',
            ),
            response=None,
            asserts=[
                AssertionEval(
                    result=None,
                    eval='"Warsaw" in response.message',
                ),
                AssertionEval(
                    result=None,
                    eval='response.stats.conn_duration < 3',
                ),
            ],
            result=None,
        ),
        TestStep(
            name='Send hello',
            request=RequestBase(
                message='Hello!',
            ),
            response=None,

In [31]:
# Initiate executor which runes test scenario
executor = Executor(test_scenario=test_scenario)

In [32]:
# Run test scenario
executor.run_all()

[32m2024-09-16 14:05:03.044[0m | [1mINFO    [0m | [36mcontextcheck.executors.executor[0m:[36mrun_all[0m:[36m41[0m - [1mRunning scenario[0m
[32m2024-09-16 14:05:03.045[0m | [1mINFO    [0m | [36mcontextcheck.interfaces.interface[0m:[36m__call__[0m:[36m11[0m - [1mname='Write success in the response' request=RequestBase(message='Write success in the response') response=None asserts=[] result=None[0m
[32m2024-09-16 14:05:03.046[0m | [1mINFO    [0m | [36mcontextcheck.interfaces.interface[0m:[36m__call__[0m:[36m11[0m - [1mmessage='Write success in the response'[0m
[32m2024-09-16 14:05:03.656[0m | [1mINFO    [0m | [36mcontextcheck.interfaces.interface[0m:[36m__call__[0m:[36m11[0m - [1mmessage='Success! How can I assist you further?' stats=ResponseStats(tokens_request=12, tokens_response=9, tokens_total=21, conn_start_time=18873.231008109, conn_end_time=18873.839623668, conn_duration=0.6086155589982809) id='chatcmpl-A84lIXOMMNw5lANRJDlaJXCHgj9ZS' c

False

In [33]:
# NOTE RB: Maybe executor should copy the test scenario
# Inspect updated test_scenario
pprint(test_scenario)

TestScenario(
    steps=[
        TestStep(
            name='Write success in the response',
            request=RequestBase(
                message='Write success in the response',
            ),
            response=ResponseModel(
                message='Success! How can I assist you further?',
                stats=ResponseStats(
                    tokens_request=12,
                    tokens_response=9,
                    tokens_total=21,
                    conn_start_time=18873.231008109,
                    conn_end_time=18873.839623668,
                    conn_duration=0.6086155589982809,
                ),
                id='chatcmpl-A84lIXOMMNw5lANRJDlaJXCHgj9ZS',
                choices=[
                    {
                        'finish_reason': 'stop',
                        'index': 0,
                        'logprobs': None,
                        'message': {
                            'content': 'Success! How can I assist you further?',
                

In [37]:
# We can inspect each test step separately and check its results
for step in test_scenario.steps:
    print(f"Step name: {step.name}, Result: {step.result}")

Step name: Write success in the response, Result: True
Step name: Check capital of Poland, Result: True
Step name: Send hello, Result: False
