In [None]:
import nest_asyncio
import pandas as pd
import phoenix as px
from phoenix.evals import OpenAIModel, llm_classify
from phoenix.evals.default_templates import (
    HALLUCINATION_PROMPT_RAILS_MAP,
    HALLUCINATION_PROMPT_TEMPLATE,
    QA_PROMPT_RAILS_MAP,
    QA_PROMPT_TEMPLATE,
    RAG_RELEVANCY_PROMPT_RAILS_MAP,
    RAG_RELEVANCY_PROMPT_TEMPLATE,
)
from phoenix.trace.dsl import SpanQuery
from phoenix.trace.dsl.helpers import (
    INPUT,
    IO,
    IS_RETRIEVER,
    IS_ROOT,
    get_qa_with_reference,
    get_retrieved_documents,
)
from phoenix.trace.semantic_conventions import (
    DOCUMENT_CONTENT,
    DOCUMENT_SCORE,
    RETRIEVAL_DOCUMENTS,
)

nest_asyncio.apply()

In [None]:
import os
from getpass import getpass

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [None]:
model = OpenAIModel(model_name="gpt-3.5-turbo-instruct")
model("hi")

In [None]:
ds = px.load_example_traces("llama_index_rag")
session = px.launch_app(trace=ds)

In [None]:
docs_df = get_retrieved_documents(px.Client())
docs_df.head()

In [None]:
docs_eval = llm_classify(
    docs_df,
    model,
    RAG_RELEVANCY_PROMPT_TEMPLATE,
    list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),
    provide_explanation=True,
)
docs_eval["score"] = (docs_eval.label[docs_eval.label.notnull()] == "relevant").astype(int)
docs_eval.head()

In [None]:
qa_df = get_qa_with_reference(px.Client())
qa_df.head()

In [None]:
qa_eval = llm_classify(
    qa_df,
    model,
    QA_PROMPT_TEMPLATE,
    list(QA_PROMPT_RAILS_MAP.values()),
    provide_explanation=True,
)
qa_eval["score"] = (qa_eval.label[qa_eval.label.notnull()] == "correct").astype(int)
qa_eval.head()

In [None]:
hallucination_eval = llm_classify(
    qa_df,
    model,
    HALLUCINATION_PROMPT_TEMPLATE,
    list(HALLUCINATION_PROMPT_RAILS_MAP.values()),
    provide_explanation=True,
)
hallucination_eval["score"] = (
    hallucination_eval.label[hallucination_eval.label.notnull()] == "factual"
).astype(int)
hallucination_eval.head()

In [None]:
session.query_spans(SpanQuery().select(**IO).where(IS_ROOT))

In [None]:
session.query_spans(
    SpanQuery()
    .select(**INPUT)
    .explode(RETRIEVAL_DOCUMENTS, reference=DOCUMENT_CONTENT, score=DOCUMENT_SCORE)
    .where(IS_RETRIEVER)
)

In [None]:
pd.concat(
    session.query_spans(
        SpanQuery().select(**IO).where(IS_ROOT),
        SpanQuery()
        .select(span_id="parent_id")
        .concat(RETRIEVAL_DOCUMENTS, reference=DOCUMENT_CONTENT),
    ),
    axis=1,
    join="inner",
)