In [None]:
%pip install -Uqqq datasets openinference-instrumentation-openai openai-responses openai tiktoken langchain langchain-openai llama-index llama-index-llms-openai faker mdgen

In [None]:
from base64 import b64encode
from contextlib import ExitStack, contextmanager
from io import BytesIO
from random import choice, randint, random, shuffle
from secrets import token_hex
from time import sleep

import openai
from datasets import load_dataset
from faker import Faker
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from llama_index.core.llms import ChatMessage
from llama_index.llms.openai import OpenAI
from mdgen import MarkdownPostProvider
from openai_responses import OpenAIMock
from openinference.instrumentation.langchain import LangChainInstrumentor
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from openinference.instrumentation.openai import OpenAIInstrumentor
from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import StatusCode, TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from PIL import Image
from tiktoken import encoding_for_model

fake = Faker()
fake.add_provider(MarkdownPostProvider)

# Download Data

In [None]:
df = load_dataset("GitBag/ultrainteract_multiturn_1_iter_processed_harvard")["train"].to_pandas()
convo = df.loc[df.chosen.apply(len) == 10, "chosen"]

# Tracer Provider

In [None]:
tracer_provider = TracerProvider()
in_memory_span_exporter = InMemorySpanExporter()
tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
endpoint = "http://127.0.0.1:4317"
otlp_span_exporter = OTLPSpanExporter(endpoint=endpoint)

# Helpers

In [None]:
def gen_session_id():
    return token_hex(32) if random() < 0.5 else int(abs(random()) * 1_000_000_000)


def gen_user_id():
    return fake.user_name() if random() < 0.5 else int(abs(random()) * 1_000_000_000)


def export_spans():
    """Export spans in random order for receiver testing"""
    spans = list(in_memory_span_exporter.get_finished_spans())
    shuffle(spans)
    for span in spans:
        otlp_span_exporter.export([span])
        sleep(0.01)
    in_memory_span_exporter.clear()


def rand_span_kind():
    yield SpanAttributes.OPENINFERENCE_SPAN_KIND, choice(list(OpenInferenceSpanKindValues)).value


def set_session_id(span, has_session_id, session_id):
    if not has_session_id and random() < 0.1:
        span.set_attribute(SpanAttributes.SESSION_ID, session_id)
        return True
    return has_session_id


def set_user_id(span, has_user_id, user_id):
    if not has_user_id and random() < 0.1:
        span.set_attribute(SpanAttributes.USER_ID, user_id)
        return True
    return has_user_id


@contextmanager
def trace_tree(session_id, user_id):
    has_session_id = has_user_id = False
    tracer = tracer_provider.get_tracer(__name__)
    with ExitStack() as trace:
        root = trace.enter_context(
            tracer.start_as_current_span(
                "root",
                attributes=dict(rand_span_kind()),
                end_on_exit=False,
            )
        )
        for _ in range(randint(0, 10)):
            span = trace.enter_context(
                tracer.start_as_current_span("parent", attributes=dict(rand_span_kind()))
            )
            has_session_id = set_session_id(span, has_session_id, session_id)
            has_user_id = set_user_id(span, has_user_id, user_id)
            span.set_status(choice([StatusCode.OK] * 10 + list(StatusCode)))
        for _ in range(randint(0, 10)):
            span = tracer.start_span("sibling", attributes=dict(rand_span_kind()))
            has_session_id = set_session_id(span, has_session_id, session_id)
            has_user_id = set_user_id(span, has_user_id, user_id)
            span.set_status(choice([StatusCode.OK] * 10 + list(StatusCode)))
            span.end()
        yield
        for _ in range(randint(0, 10)):
            span = tracer.start_span("sibling", attributes=dict(rand_span_kind()))
            has_session_id = set_session_id(span, has_session_id, session_id)
            has_user_id = set_user_id(span, has_user_id, user_id)
            span.set_status(choice([StatusCode.OK] * 10 + list(StatusCode)))
            span.end()
    if not has_session_id:
        root.set_attribute(SpanAttributes.SESSION_ID, session_id)
    if not has_user_id:
        root.set_attribute(SpanAttributes.USER_ID, user_id)
    root.end()

# Text Only

## OpenAI

In [None]:
session_count = 5
user_id = gen_user_id()


def simulate_openai(messages):
    session_id = gen_session_id()
    client = openai.Client(api_key="sk-")
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    openai_mock = OpenAIMock()
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with trace_tree(session_id, user_id):
                client.chat.completions.create(model=model, messages=messages[:i])


OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(session_count).apply(simulate_openai)
OpenAIInstrumentor().uninstrument()
export_spans()

## LangChain

In [None]:
session_count = 5
user_id = gen_user_id()


def simulate_langchain(messages):
    session_id = gen_session_id()
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    llm = ChatOpenAI(model_name=model, openai_api_key="sk-")
    openai_mock = OpenAIMock()
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with trace_tree(session_id, user_id):
                llm.invoke(
                    [
                        HumanMessage(m["content"])
                        if m["role"] == "user"
                        else AIMessage(m["content"])
                        for m in messages[:i]
                    ]
                )


LangChainInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(session_count).apply(simulate_langchain)
LangChainInstrumentor().uninstrument()
export_spans()

## Llama-Index

In [None]:
session_count = 5
user_id = gen_user_id()


def simulate_llama_index(messages):
    session_id = gen_session_id()
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    llm = OpenAI(api_key="sk-")
    openai_mock = OpenAIMock()
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with trace_tree(session_id, user_id):
                llm.complete([ChatMessage(**m) for m in messages[:i]])


LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(session_count).apply(simulate_llama_index)
LlamaIndexInstrumentor().uninstrument()
export_spans()

# Vision

## OpenAI

In [None]:
session_count = 5
user_id = gen_user_id()


def simulate_openai_vision():
    session_id = gen_session_id()
    client = openai.Client(api_key="sk-")
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    openai_mock = OpenAIMock()
    messages = []
    usage = dict(prompt_tokens=0, completion_tokens=0, total_tokens=0)
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        for _ in range(randint(5, 20)):
            text = fake.post(size="small")
            if random() < 0.5:
                images = []
                for _ in range(randint(3, 10)):
                    img = Image.new("RGB", (5, 5), fake.color_rgb())
                    buffered = BytesIO()
                    img.save(buffered, format="PNG")
                    url = f"data:image/png;base64,{b64encode(buffered.getvalue()).decode()}"
                    images.append(dict(type="image_url", image_url=dict(url=url)))
                content = [dict(type="text", text=text)] + images
            else:
                content = text
            request = dict(role="user", content=content)
            response = dict(role="assistant", content=fake.post(size="medium"))
            usage["prompt_tokens"] += len(encoding.encode(text))
            usage["completion_tokens"] += len(encoding.encode(response["content"]))
            usage["total_tokens"] = usage["prompt_tokens"] + usage["completion_tokens"]
            messages.extend([request, response])
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[-1])],
                usage=usage,
            )
            with trace_tree(session_id, user_id):
                client.chat.completions.create(model=model, messages=messages[:-1])


OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
for _ in range(session_count):
    simulate_openai_vision()
OpenAIInstrumentor().uninstrument()
export_spans()