In [None]:
%pip install -Uqqq datasets openinference-instrumentation-openai openai-responses openai tiktoken langchain langchain-openai llama-index llama-index-llms-openai faker

In [None]:
from contextlib import ExitStack
from random import randint
from secrets import token_hex

import openai
from datasets import load_dataset
from faker import Faker
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from llama_index.core.llms import ChatMessage
from llama_index.llms.openai import OpenAI
from openai_responses import OpenAIMock
from openinference.instrumentation import using_session, using_user
from openinference.instrumentation.langchain import LangChainInstrumentor
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from openinference.instrumentation.openai import OpenAIInstrumentor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from tiktoken import encoding_for_model

fake = Faker()

# Download Data

In [None]:
df = load_dataset("GitBag/ultrainteract_multiturn_1_iter_processed_harvard")["train"].to_pandas()
convo = df.loc[df.chosen.apply(len) == 10, "chosen"]

# Tracer Provider

In [None]:
endpoint = "http://127.0.0.1:4317"
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))

# Simulate OpenAI

Add fake spans to simulate trace tree.

In [None]:
sample_size = 1
user_id = fake.user_name()


def simulate_openai(messages):
    client = openai.Client(api_key="sk-")
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    openai_mock = OpenAIMock()
    tracer = tracer_provider.get_tracer(__name__)
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        stack.enter_context(using_session(token_hex(32)))
        stack.enter_context(using_user(user_id))
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with ExitStack() as trace:
                for _ in range(randint(1, 3)):
                    trace.enter_context(tracer.start_as_current_span("fake span"))
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()
                client.chat.completions.create(
                    model=model,
                    messages=messages[:i],
                )
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()


OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(sample_size).apply(simulate_openai)
OpenAIInstrumentor().uninstrument()

# Simulate LangChain

Add fake spans to simulate trace tree.

In [None]:
sample_size = 1
user_id = fake.user_name()


def simulate_langchain(messages):
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    llm = ChatOpenAI(model_name=model, openai_api_key="sk-")
    openai_mock = OpenAIMock()
    tracer = tracer_provider.get_tracer(__name__)
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        stack.enter_context(using_session(token_hex(32)))
        stack.enter_context(using_user(user_id))
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with ExitStack() as trace:
                for _ in range(randint(1, 3)):
                    trace.enter_context(tracer.start_as_current_span("fake span"))
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()
                llm.invoke(
                    [
                        HumanMessage(m["content"])
                        if m["role"] == "user"
                        else AIMessage(m["content"])
                        for m in messages[:i]
                    ]
                )
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()


LangChainInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(sample_size).apply(simulate_langchain)
LangChainInstrumentor().uninstrument()

# Simulate Llama-Index

Add fake spans to simulate trace tree.

In [None]:
sample_size = 1
user_id = fake.user_name()


def simulate_llama_index(messages):
    model = "gpt-4o-mini"
    encoding = encoding_for_model(model)
    counts = [len(encoding.encode(m["content"])) for m in messages]
    llm = OpenAI(api_key="sk-")
    openai_mock = OpenAIMock()
    tracer = tracer_provider.get_tracer(__name__)
    with ExitStack() as stack:
        stack.enter_context(openai_mock.router)
        stack.enter_context(using_session(token_hex(32)))
        stack.enter_context(using_user(user_id))
        for i in range(1, len(messages), 2):
            openai_mock.chat.completions.create.response = dict(
                choices=[dict(index=0, finish_reason="stop", message=messages[i])],
                usage=dict(
                    prompt_tokens=sum(counts[:i]),
                    completion_tokens=counts[i],
                    total_tokens=sum(counts[: i + 1]),
                ),
            )
            with ExitStack() as trace:
                for _ in range(randint(1, 3)):
                    trace.enter_context(tracer.start_as_current_span("fake span"))
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()
                llm.complete([ChatMessage(**m) for m in messages[:i]])
                for _ in range(randint(0, 2)):
                    tracer.start_span("fake span").end()


LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
convo.sample(sample_size).apply(simulate_llama_index)
LlamaIndexInstrumentor().uninstrument()