In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)

In [None]:
import phoenix as px
from phoenix.db import models
from phoenix.trace.dsl.helpers import get_qa_with_reference, get_retrieved_documents
from phoenix.trace.dsl.query import SpanQuery
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker

In [None]:
PostgresSession = sessionmaker(
    create_engine(
        "postgresql+psycopg://localhost:5432/postgres?user=postgres&password=mysecretpassword",
        echo=True,
    ),
    expire_on_commit=False,
)
SqliteSession = sessionmaker(
    create_engine("sqlite:////Users/rogeryang/.phoenix/phoenix.db", echo=True),
    expire_on_commit=False,
)

# latency ms

In [None]:
stmt = select(models.Span.latency_ms)
with SqliteSession.begin() as session:
    print(session.scalar(stmt))
with PostgresSession.begin() as session:
    print(session.scalar(stmt))

In [None]:
orig_endpoint = "http://127.0.0.1:6007"
postgres_endpoint = "http://127.0.0.1:6006"
sqlite_endpoint = "http://127.0.0.1:6005"

# get spans dataframe with filter

In [None]:
filter_condition = "latency_ms > 1000 and 'service' in output.value"
df_orig_root_spans = (
    px.Client(endpoint=orig_endpoint)
    .get_spans_dataframe(filter_condition, root_spans_only=True)
    .sort_index()
    .sort_index(axis=1)
)
print(f"{df_orig_root_spans.shape=}")
df_postgres_root_spans = (
    px.Client(endpoint=postgres_endpoint)
    .get_spans_dataframe(filter_condition, root_spans_only=True)
    .sort_index()
    .sort_index(axis=1)
)
print(f"{df_postgres_root_spans.shape=}")
df_sqlite_root_spans = (
    px.Client(endpoint=sqlite_endpoint)
    .get_spans_dataframe(filter_condition, root_spans_only=True)
    .sort_index()
    .sort_index(axis=1)
)
print(f"{df_sqlite_root_spans.shape=}")
print(df_orig_root_spans.columns)
print(df_postgres_root_spans.columns)
print(df_sqlite_root_spans.columns)

In [None]:
for i in range(len(df_orig_root_spans.columns)):
    print(
        f"{df_orig_root_spans.iloc[:,i].equals(df_postgres_root_spans.iloc[:,i])}, {df_orig_root_spans.iloc[:,i].equals(df_sqlite_root_spans.iloc[:,i])}, {df_postgres_root_spans.iloc[:,i].equals(df_sqlite_root_spans.iloc[:,i])},, {df_orig_root_spans.columns[i]} {i=}"
    )

# get spans dataframe no filter

In [None]:
df_orig = (
    px.Client(endpoint=orig_endpoint)
    .get_spans_dataframe()
    .sort_index()
    .sort_index(axis=1)
    .drop("conversation", axis=1)
)
print(f"{df_orig.shape=}")
df_postgres = (
    px.Client(endpoint=postgres_endpoint)
    .get_spans_dataframe()
    .sort_index()
    .sort_index(axis=1)
    .drop("attributes.openinference.span.kind", axis=1)
)
print(f"{df_postgres.shape=}")
df_sqlite = (
    px.Client(endpoint=sqlite_endpoint)
    .get_spans_dataframe()
    .sort_index()
    .sort_index(axis=1)
    .drop("attributes.openinference.span.kind", axis=1)
)
print(f"{df_sqlite.shape=}")
print(df_orig.columns)
print(df_postgres.columns)
print(df_sqlite.columns)

In [None]:
for i in range(len(df_orig.columns)):
    print(
        f"{df_orig.iloc[:,i].equals(df_postgres.iloc[:,i])}",
        f"{df_orig.iloc[:,i].equals(df_sqlite.iloc[:,i])}",
        f"{df_postgres.iloc[:,i].equals(df_sqlite.iloc[:,i])}",
        f"{df_orig.columns[i]}",
        f"{i=}",
    )

# qa with reference

In [None]:
qa_orig = get_qa_with_reference(px.Client(endpoint=orig_endpoint)).sort_index().sort_index(axis=1)
qa_postgres = (
    get_qa_with_reference(px.Client(endpoint=postgres_endpoint)).sort_index().sort_index(axis=1)
)
qa_sqlite = (
    get_qa_with_reference(px.Client(endpoint=sqlite_endpoint)).sort_index().sort_index(axis=1)
)
print(f"{qa_orig.shape=}")
print(f"{qa_postgres.shape=}")
print(f"{qa_sqlite.shape=}")
print(f"{qa_orig.equals(qa_postgres)=}")
print(f"{qa_orig.equals(qa_sqlite)=}")
print(f"{qa_postgres.equals(qa_sqlite)=}")
pd.concat(
    [
        qa_orig.sample(5, random_state=42),
        qa_postgres.sample(5, random_state=42),
        qa_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# get retrieved documents

In [None]:
docs_orig = (
    get_retrieved_documents(px.Client(endpoint=orig_endpoint)).sort_index().sort_index(axis=1)
)
docs_postgres = (
    get_retrieved_documents(px.Client(endpoint=postgres_endpoint)).sort_index().sort_index(axis=1)
)
docs_sqlite = (
    get_retrieved_documents(px.Client(endpoint=sqlite_endpoint)).sort_index().sort_index(axis=1)
)
print(f"{docs_orig.shape=}")
print(f"{docs_postgres.shape=}")
print(f"{docs_sqlite.shape=}")
print(f"{docs_orig.equals(docs_postgres)=}")
print(f"{docs_orig.equals(docs_sqlite)=}")
print(f"{docs_postgres.equals(docs_sqlite)=}")
pd.concat(
    [
        docs_orig.sample(5, random_state=42),
        docs_postgres.sample(5, random_state=42),
        docs_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# select

In [None]:
select_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
)
df_select_orig = px.Client(endpoint=orig_endpoint).query_spans(select_query).sort_index()
print(f"{df_select_orig.shape=}")
df_select_postgres = px.Client(endpoint=postgres_endpoint).query_spans(select_query).sort_index()
print(f"{df_select_postgres.shape=}")
df_select_sqlite = px.Client(endpoint=sqlite_endpoint).query_spans(select_query).sort_index()
print(f"{df_select_sqlite.shape=}")
print(f"{df_select_orig.equals(df_select_postgres)=}")
print(f"{df_select_orig.equals(df_select_sqlite)=}")
print(f"{df_select_postgres.equals(df_select_sqlite)=}")
pd.concat(
    [
        df_select_orig.sample(5, random_state=42),
        df_select_postgres.sample(5, random_state=42),
        df_select_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode (no select or concat)

In [None]:
explode_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content", score="document.score")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode with select (no concat)

In [None]:
explode_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content", score="document.score")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode with concat (no select)

In [None]:
explode_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents", score="document.score")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode with concat and select

In [None]:
explode_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents", score="document.score")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode with no kwargs (no select or concat)

In [None]:
explode_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# concat (no select or explode)

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .concat("retrieval.documents", content="document.content", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# concat with explode (no select)

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# concat with select (no explode)

In [None]:
concat_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .concat("retrieval.documents", content="document.content", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# concat with select and explode

In [None]:
concat_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat (no select or explode)

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat with explode (no select)

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat with select (no explode)

In [None]:
concat_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat with select and explode

In [None]:
concat_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents", content="document.content")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat with no kwargs explode and select

In [None]:
concat_query = (
    SpanQuery()
    .select("trace_id", "input.value")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# no kwargs concat with no kwargs explode (no select)

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# concat index by name

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .with_index("name")
    .concat("retrieval.documents")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

In [None]:
explode_query = (
    SpanQuery()
    .with_index("name")
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .explode("retrieval.documents")
)
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .with_index("trace_id")
    .concat("retrieval.documents", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

In [None]:
concat_query = (
    SpanQuery()
    .where("span_kind == 'RETRIEVER' and parent_id is not None and latency_ms > 200")
    .with_index("span_id")
    .concat("retrieval.documents", score="document.score")
    .with_concat_separator("🌟")
)
df_concat_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_orig.shape=}")
df_concat_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_postgres.shape=}")
df_concat_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(concat_query).sort_index().sort_index(axis=1)
)
print(f"{df_concat_sqlite.shape=}")
print(f"{df_concat_orig.equals(df_concat_postgres)=}")
print(f"{df_concat_orig.equals(df_concat_sqlite)=}")
print(f"{df_concat_postgres.equals(df_concat_sqlite)=}")
pd.concat(
    [
        df_concat_orig.sample(5, random_state=42),
        df_concat_postgres.sample(5, random_state=42),
        df_concat_sqlite.sample(5, random_state=42),
    ]
).sort_index()

# explode embeddings

In [None]:
explode_query = SpanQuery().explode("embedding.embeddings", vector="embedding.vector")
df_explode_orig = (
    px.Client(endpoint=orig_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_orig.shape=}")
df_explode_postgres = (
    px.Client(endpoint=postgres_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_postgres.shape=}")
df_explode_sqlite = (
    px.Client(endpoint=sqlite_endpoint).query_spans(explode_query).sort_index().sort_index(axis=1)
)
print(f"{df_explode_sqlite.shape=}")
print(f"{df_explode_orig.equals(df_explode_postgres)=}")
print(f"{df_explode_orig.equals(df_explode_sqlite)=}")
print(f"{df_explode_postgres.equals(df_explode_sqlite)=}")
pd.concat(
    [
        df_explode_orig.sample(5, random_state=42),
        df_explode_postgres.sample(5, random_state=42),
        df_explode_sqlite.sample(5, random_state=42),
    ]
).sort_index()