# A Working Example of the Traces + Evaluations


In [None]:
import phoenix as px

ds = px.load_example_traces("llama_index_rag")

In [None]:
spans_df = ds.to_spans_dataframe().head(5)
spans_df.head()

In [None]:
from phoenix.trace.spans_dataframe_utils import SpansDataframeFormats, to_format

# Filter down the traces just to just span_id, input, and output
spans_df = to_format(spans_df, SpansDataframeFormats.key_value)
spans_df.head()

In [None]:
import getpass
import os

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")

openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [None]:
from phoenix.experimental.evals import (
    TOXICITY_PROMPT_RAILS_MAP,
    TOXICITY_PROMPT_TEMPLATE_STR,
    OpenAIModel,
    llm_classify,
)

eval_classifications_df = llm_classify(
    spans_df.rename(columns={"input": "text"}),
    model=OpenAIModel("gpt-4"),
    template=TOXICITY_PROMPT_TEMPLATE_STR,
    rails=list(TOXICITY_PROMPT_RAILS_MAP.values()),
    verbose=True,
    provide_explanation=True,
)

In [None]:
eval_classifications_df.head()

In [None]:
from phoenix.trace.trace_eval_dataset import binary_classifications_to_trace_eval_dataset

evaluation_ds = binary_classifications_to_trace_eval_dataset(
    eval_name="toxicity",
    classifications_df=eval_classifications_df,
    spans_df=spans_df,
    rails_map=TOXICITY_PROMPT_RAILS_MAP,
)

ds.append_evaluation(evaluation_ds)

In [None]:
ds.to_spans_dataframe().head()