<center>
    <p style="text-align:center">
        <img alt="phoenix logo" src="https://storage.googleapis.com/arize-phoenix-assets/assets/phoenix-logo-light.svg" width="200"/>
        <br>
        <a href="https://docs.arize.com/phoenix/">Docs</a>
        |
        <a href="https://github.com/Arize-ai/phoenix">GitHub</a>
        |
        <a href="https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q">Community</a>
    </p>
</center>
<h1 align="center">Tracing and Evaluating a Haystack Application with Phoenix</h1>

‚ÑπÔ∏è This notebook requires an OpenAI API key.


In [1]:
!pip install -q openinference-instrumentation-haystack haystack-ai arize-phoenix opentelemetry-sdk opentelemetry-exporter-otlp arize-phoenix-otel


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Set API Keys

In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
os.environ["PHOENIX_API_KEY"] = getpass.getpass("Phoenix API Key:")

OpenAI API Key:¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑
Phoenix API Key:¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


# Launch Phoenix and Enable Haystack Tracing

In [None]:
import os

from openinference.instrumentation.haystack import HaystackInstrumentor

from phoenix.otel import register

PHOENIX_API_KEY = os.environ["PHOENIX_API_KEY"]
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
os.environ["PHOENIX_CLIENT_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com"

tracer_provider = register()

# Use Phoenix's autoinstrumentor to automatically track traces from Haystack
HaystackInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)

# Set up your Haystack app

In [None]:
import os

from haystack import Document, Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore

# Write documents to InMemoryDocumentStore
document_store = InMemoryDocumentStore()
document_store.write_documents(
    [
        Document(content="My name is Jean and I live in Paris."),
        Document(content="My name is Mark and I live in Berlin."),
        Document(content="My name is Giorgio and I live in Rome."),
    ]
)

# Build a RAG pipeline
prompt_template = """
Given these documents, answer the question.
Documents:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}
Question: {{question}}
Answer:
"""

retriever = InMemoryBM25Retriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt_template)
llm = OpenAIGenerator(model="gpt-3.5-turbo")

rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x78d8b8313e50>
üöÖ Components
  - retriever: InMemoryBM25Retriever
  - prompt_builder: PromptBuilder
  - llm: OpenAIGenerator
üõ§Ô∏è Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [None]:
# Ask a question
question = "Who lives in Paris?"
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

print(results["llm"]["replies"])

['Jean lives in Paris.']


# Evaluating Retrieved Docs

In [None]:
import nest_asyncio

import phoenix as px

nest_asyncio.apply()

In [None]:
from phoenix.session.evaluation import get_retrieved_documents

client = px.Client()

retrieved_documents_df = get_retrieved_documents(px.Client())
retrieved_documents_df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,context.trace_id,input,reference,document_score
context.span_id,document_position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bba44d4539b972a9,0,f7dd5d45af0f0c48c96d197d1763de52,"{""query"": ""Who lives in Paris?"", ""filters"": nu...",My name is Jean and I live in Paris.,1.293454
bba44d4539b972a9,1,f7dd5d45af0f0c48c96d197d1763de52,"{""query"": ""Who lives in Paris?"", ""filters"": nu...",My name is Mark and I live in Berlin.,0.76801
bba44d4539b972a9,2,f7dd5d45af0f0c48c96d197d1763de52,"{""query"": ""Who lives in Paris?"", ""filters"": nu...",My name is Giorgio and I live in Rome.,0.76801


In [None]:
from phoenix.evals import OpenAIModel, RelevanceEvaluator, run_evals

relevance_evaluator = RelevanceEvaluator(OpenAIModel(model="gpt-4o-mini"))

retrieved_documents_relevance_df = run_evals(
    evaluators=[relevance_evaluator],
    dataframe=retrieved_documents_df,
    provide_explanation=True,
    concurrency=20,
)[0]

run_evals |          | 0/3 (0.0%) | ‚è≥ 00:00<? | ?it/s

In [None]:
retrieved_documents_relevance_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,label,score,explanation
context.span_id,document_position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bba44d4539b972a9,0,relevant,1,The question asks who lives in Paris. The refe...
bba44d4539b972a9,1,unrelated,0,"The question asks about who lives in Paris, wh..."
bba44d4539b972a9,2,unrelated,0,"The question asks about who lives in Paris, wh..."


In [None]:
from phoenix.trace import DocumentEvaluations, SpanEvaluations

px.Client().log_evaluations(
    DocumentEvaluations(dataframe=retrieved_documents_relevance_df, eval_name="relevance"),
)



# Evaluate Response

In [None]:
from phoenix.session.evaluation import get_qa_with_reference

qa_with_reference_df = get_qa_with_reference(px.Client())
qa_with_reference_df



Unnamed: 0_level_0,input,output,reference
context.span_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
16acfe8cec246618,"{""data"": {""retriever"": {""query"": ""Who lives in...","{""llm"": {""replies"": [""Jean lives in Paris.""], ...",My name is Jean and I live in Paris.\n\nMy nam...


In [None]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    run_evals,
)

qa_evaluator = QAEvaluator(OpenAIModel(model="gpt-4-turbo-preview"))
hallucination_evaluator = HallucinationEvaluator(OpenAIModel(model="gpt-4-turbo-preview"))

qa_correctness_eval_df, hallucination_eval_df = run_evals(
    evaluators=[qa_evaluator, hallucination_evaluator],
    dataframe=qa_with_reference_df,
    provide_explanation=True,
    concurrency=20,
)

run_evals |          | 0/2 (0.0%) | ‚è≥ 00:00<? | ?it/s

In [None]:
px.Client().log_evaluations(
    SpanEvaluations(dataframe=qa_correctness_eval_df, eval_name="Q&A Correctness"),
    SpanEvaluations(dataframe=hallucination_eval_df, eval_name="Hallucination"),
)

