# Dummy Provider Example and High Volume Robustness Testing

This notebook has two purposes: 

- Demostrate the dummy feedback function provider which behaves like the
  huggingface provider except it does not actually perform any network calls and
  just produces constant results. It can be used to prototype feedback function
  wiring for your apps before invoking potentially slow (to run/to load)
  feedback functions.

- Test out high-volume record and feedback computation. To this end, we use the
  custom app which is dummy in a sense that it produces useless answers without
  making any API calls but otherwise behaves similarly to real apps, and the
  dummy feedback function provider.

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

# If running from github repo, can use this:
sys.path.append(str(Path().cwd().parent.parent.resolve()))


In [2]:
from concurrent.futures import as_completed
from time import sleep

from examples.expositional.end2end_apps.custom_app.custom_app import CustomApp
from tqdm.auto import tqdm

from trulens_eval import Feedback
from trulens_eval import Tru
from trulens_eval.feedback.provider.hugs import Dummy
from trulens_eval.schema.feedback import FeedbackMode
from trulens_eval.tru_custom_app import TruCustomApp
from trulens_eval.utils.threading import TP

tp = TP()

d = Dummy(
    loading_prob=0.0,
    freeze_prob=0.0, # we expect requests to have their own timeouts so freeze should never happen
    error_prob=0.0,
    overloaded_prob=0.0,
    rpm=1000,
    alloc = 0, # how much fake data to allocate during requests
    delay = 10.0
)

tru = Tru()

#tru.reset_database()

tru.start_dashboard(
    force = True,
    _dev=Path().cwd().parent.parent.resolve()
)

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.
Force stopping dashboard ...
Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://192.168.5.95:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [4]:
f_dummy1 = Feedback(
    d.language_match
).on_input_output()

f_dummy2 = Feedback(
    d.positive_sentiment, name="output sentiment"
).on_output()

f_dummy3 = Feedback(
    d.positive_sentiment, name="input sentiment"
).on_input()


# Create custom app:
ca = CustomApp(delay=0.0, alloc=0)

# Create trulens wrapper:
ta = TruCustomApp(
    ca,
    app_id="customapp",
    # feedbacks=[f_dummy1, f_dummy2, f_dummy3],
    feedback_mode=FeedbackMode.DEFERRED
)

✅ In language_match, input text1 will be set to __record__.main_input or `Select.RecordInput` .
✅ In language_match, input text2 will be set to __record__.main_output or `Select.RecordOutput` .
✅ In output sentiment, input text will be set to __record__.main_output or `Select.RecordOutput` .
✅ In input sentiment, input text will be set to __record__.main_input or `Select.RecordInput` .


In [5]:
# Sequential app invocation.

if True:
    for i in tqdm(range(4), desc="invoking app"):
        with ta as recorder:
            res = ca.respond_to_query(f"hello {i}")

        rec = recorder.get()
        assert rec is not None

invoking app:   0%|          | 0/16 [00:00<?, ?it/s]

In [9]:
rec.as_tracer().spans

{HashableSpanContext(trace_id=0x34f535b304cb9f0113dfb4e80485a0ee, span_id=0x338fd9fecd07153c, trace_flags=0x00, trace_state=[], is_remote=False): SpanRoot(name='root', kind=<SpanKind.INTERNAL: 0>, status=<StatusCode.UNSET: 0>, status_description=None, start_timestamp=1714009336925237000, end_timestamp=None, context=HashableSpanContext(trace_id=0x34f535b304cb9f0113dfb4e80485a0ee, span_id=0x338fd9fecd07153c, trace_flags=0x00, trace_state=[], is_remote=False), events=[], links={}, attributes={'trulens_eval@span_type': 'SpanRoot'}, attributes_metadata={}),
 HashableSpanContext(trace_id=0x34f535b304cb9f0113dfb4e80485a0ee, span_id=0x3ee70ce6eed8c9aa, trace_flags=0x00, trace_state=[], is_remote=False): SpanUntyped(name='retrieve_chunks', kind=<SpanKind.INTERNAL: 0>, status=<StatusCode.UNSET: 0>, status_description=None, start_timestamp=1714009336925276000, end_timestamp=None, context=HashableSpanContext(trace_id=0x34f535b304cb9f0113dfb4e80485a0ee, span_id=0x3ee70ce6eed8c9aa, trace_flags=0x00,

In [None]:
# ta.wait_for_feedback_results()

In [None]:
# Control retries in deferred evaluator.
# tru.RETRY_FAILED_SECONDS = 60
# tru.RETRY_RUNNING_SECONDS = 5
tru.start_evaluator(restart=True)

In [None]:
# Parallel feedback evaluation.

futures = []
num_tests = 10000
good = 0
bad = 0

def test_feedback(msg):
    return msg, d.positive_sentiment(msg)

for i in tqdm(range(num_tests), desc="starting feedback task"):
    futures.append(tp.submit(test_feedback, msg=f"good"))

prog = tqdm(as_completed(futures), total=num_tests)

for f in prog:
    try:
        res = f.result()
        good += 1

        assert res[0] == "good"

        prog.set_description_str(f"{good} / {bad}")
    except Exception as e:
        bad += 1
        prog.set_description_str(f"{good} / {bad}")

In [None]:
# Parallel app invocation.

def run_query(q):

    with ta as recorder:
        res = ca.respond_to_query(q)

    rec = recorder.get()
    assert rec is not None

    return f"run_query {q} result"

for i in tqdm(range(100), desc="starting app task"):
    print(
        tp.completed_tasks, 
        end="\r"
    )
    tp.submit(run_query, q=f"hello {i}")