In [None]:
import pandas as pd
import phoenix as px
from pandas.testing import assert_frame_equal
from phoenix.db import models
from sqlalchemy import and_, create_engine, select
from sqlalchemy.orm import aliased, sessionmaker

In [None]:
PostgresSession = sessionmaker(
    create_engine(
        "postgresql+psycopg://localhost:5432/postgres?user=postgres&password=mysecretpassword",
        echo=True,
    ),
    expire_on_commit=False,
)
SqliteSession = sessionmaker(
    create_engine("sqlite:////Users/xandersong/.phoenix/phoenix.db", echo=True),
    expire_on_commit=False,
)

In [None]:
original_endpoint = "http://127.0.0.1:6008"
sqlite_endpoint = "http://127.0.0.1:6006"
postgres_endpoint = "http://127.0.0.1:6007"

In [None]:
# SELECT * FROM spans
# JOIN (
#     SELECT spans.id, sa.score, sa.label FROM spans
#     JOIN span_annotations sa on spans.id = sa.span_rowid
# ) B ON spans.id == B.id
# WHERE B.score > 0.5 AND B.label == 'factual' AND spans.name == 'query';

In [None]:
def get_spans_dataframe(endpoint: str, filter_condition: str):
    return (
        px.Client(endpoint=endpoint)
        .get_spans_dataframe(filter_condition)
        .sort_index()
        .reindex(sorted(sqlite_df.columns), axis=1)
    )

In [None]:
filter_condition = "evals['Q&A Correctness'].label == 'correct'"
original_df = get_spans_dataframe(endpoint=original_endpoint, filter_condition=filter_condition)
postgres_df = get_spans_dataframe(endpoint=postgres_endpoint, filter_condition=filter_condition)
sqlite_df = get_spans_dataframe(endpoint=sqlite_endpoint, filter_condition=filter_condition)

In [None]:
original_df.compare(sqlite_df, result_names=("original", "sqlite"))

In [None]:
postgres_df

In [None]:
original_df.compare(postgres_df, result_names=("original", "postgres"))

In [None]:
sqlite_df[["attributes.openinference.span.kind", "events", "parent_id"]]

In [None]:
original_df[["attributes.openinference.span.kind", "events", "parent_id"]]

In [None]:
original_df["events"].map(len).value_counts()

In [None]:
type(sqlite_df["events"].iloc[0])

In [None]:
original_df.equals(sqlite_df)

In [None]:
sqlite_df["parent_id"].isna().sum()

In [None]:
original_df["attributes.openinference.span.kind"]

In [None]:
original_df.columns

In [None]:
sqlite_df.columns

In [None]:
stmt = (
    select(models.Span.span_id)
    .join(models.SpanAnnotation)
    .where(
        and_(
            models.SpanAnnotation.name == "Q&A Correctness",
            models.SpanAnnotation.label == "correct",
        ),
    )
)
with SqliteSession() as sqlite_session:
    sqlite_df = pd.read_sql(
        stmt,
        sqlite_session.connection(),
        index_col="span_id",
    )
with PostgresSession() as postgres_session:
    postgres_df = pd.read_sql(
        stmt,
        postgres_session.connection(),
        index_col="span_id",
    )
assert_frame_equal(
    sqlite_df.sort_index().sort_index(axis=1),
    postgres_df.sort_index().sort_index(axis=1),
)

```
evals["Q&A Correctness"].label == "correct" and evals["Hallucination"].label == "hallucinated"
```

In [None]:
A = aliased(models.SpanAnnotation, name="first-table")
B = aliased(models.SpanAnnotation)
stmt = (
    select(models.Span.span_id)
    .join(A, onclause=A.name == "Q&A Correctness")
    .join(B, onclause=B.name == "Hallucination")
    .where(A.label == "correct")
    .where(B.label == "hallucinated")
)
with SqliteSession() as sqlite_session:
    sqlite_df = pd.read_sql(
        stmt,
        sqlite_session.connection(),
        index_col="span_id",
    )
with PostgresSession() as postgres_session:
    postgres_df = pd.read_sql(
        stmt,
        postgres_session.connection(),
        index_col="span_id",
    )
assert_frame_equal(
    sqlite_df.sort_index().sort_index(axis=1),
    postgres_df.sort_index().sort_index(axis=1),
)

In [None]:
from sqlalchemy import inspect
from sqlalchemy.orm import aliased

insp = inspect(A)

In [None]:
from sqlalchemy.orm.util import AliasedClass

isinstance(A, AliasedClass)

In [None]:
from sqlalchemy.sql.roles import JoinTargetRole

isinstance(A, JoinTargetRole)

In [None]:
insp.name

In [None]:
# SELECT span_annotations.span_rowid,
# MAX(CASE WHEN name = 'Hallucination' and score = 0 THEN 1 ELSE 0 END) AS A,
# MAX(CASE WHEN name = 'Q&A Correctness' and label = 'correct' THEN 1 ELSE 0 END) AS B
# FROM span_annotations
# WHERE name in ('Hallucination', 'Q&A Correctness')
# GROUP BY span_annotations.span_rowid
# HAVING A = 1 and B = 1
# ORDER BY span_rowid;