# A Working Example of the Traces + Evaluations

In [None]:
import phoenix as px

ds = px.load_example_traces("llama_index_rag")

In [None]:
spans_df = ds.to_spans_dataframe()
spans_df.head()

In [None]:
from phoenix.trace.spans_dataframe_utils import SpansDataframeFormats, to_format

# Filter down the traces just to just the root spans

spans_df = to_format(spans_df, SpansDataframeFormats.key_value)
spans_df.head()

In [None]:
import getpass
import os

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [None]:
from phoenix.experimental.evals import (
    TOXICITY_PROMPT_RAILS_MAP,
    TOXICITY_PROMPT_TEMPLATE_STR,
    OpenAIModel,
    llm_classify,
)

evaluation_results = llm_classify(
    spans_df.rename(columns={"input": "text"}),
    model=OpenAIModel("gpt-4"),
    template=TOXICITY_PROMPT_TEMPLATE_STR,
    rails=list(TOXICITY_PROMPT_RAILS_MAP.values()),
    verbose=True,
    provide_explanation=True,
)

In [None]:
evaluation_results.head()

In [None]:
# Convert the labels to a 0 or 1 depending on if the span is toxic or not
evaluation_results["label"] = evaluation_results["label"].apply(lambda x: 1 if x == "toxic" else 0)

In [None]:
evaluation_results = evaluation_results.rename(columns={
    "label": "eval.toxicity",
    "explanation": "eval.toxicity.explanation",
})

In [None]:
evaluation_results.head()

In [None]:
from phoenix.trace.spans_dataframe_utils import to_trace_evaluations

# We need to now associate the evaluation results with the original spans
# We can do this by index
evaluations = to_trace_evaluations(spans_df, evaluation_results)
# Drop the columns that are not needed
evaluations.head()