## Setup

Install libraries

In [None]:
!pip install -qq arize-phoenix llama-index "openai>=1" gcsfs nest_asyncio langchain langchain-community cohere llama-index-postprocessor-cohere-rerank

Set up environment variables


In [None]:
import os
from getpass import getpass

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key

if not (cohere_api_key := os.getenv("COHERE_API_KEY")):
    cohere_api_key = getpass("🔑 Enter your Cohere API key: ")
os.environ["COHERE_API_KEY"] = cohere_api_key

## Launch Phoenix and Instrumentation

In [None]:
import phoenix as px

In [None]:
session = px.launch_app()

In [None]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint)))

LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

## Parse Phoenix Documentation into Llama-Index Documents

In [None]:
# The nest_asyncio module enables the nesting of asynchronous functions within an already running async loop.
# This is necessary because Jupyter notebooks inherently operate in an asynchronous loop.
# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.
import json
import logging
import sys
import time

import nest_asyncio

nest_asyncio.apply()

import pandas as pd
from langchain.document_loaders import GitbookLoader
from llama_index.core import Document, VectorStoreIndex
from llama_index.llms.openai import OpenAI

Enable Phoenix tracing via `LlamaIndexInstrumentor`. Phoenix uses OpenInference traces - an open-source standard for capturing and storing LLM application traces that enables LLM applications to seamlessly integrate with LLM observability solutions such as Phoenix.

In [None]:
"""
Fetches the Arize documentation from Gitbook and serializes it into LangChain format.
"""


def load_gitbook_docs(docs_url: str):
    """Loads documents from a Gitbook URL.

    Args:
        docs_url (str): URL to Gitbook docs.

    Returns:
        List[LangChainDocument]: List of documents in LangChain format.
    """
    loader = GitbookLoader(
        docs_url,
        load_all_paths=True,
    )
    return loader.load()


logging.basicConfig(level=logging.INFO, stream=sys.stdout)

# fetch documentation
docs_url = "https://docs.arize.com/phoenix"
embedding_model_name = "text-embedding-ada-002"
docs = load_gitbook_docs(docs_url)

In [None]:
documents = []
for doc in docs:
    documents.append(Document(metadata=doc.metadata, text=doc.page_content))

In [None]:
documents[0].metadata

In [None]:
# Convert documents to a JSON serializable format (if needed)
documents_json = [doc.to_dict() for doc in documents]

# Save to a JSON file
with open("llama_index_documents.json", "w") as file:
    json.dump(documents_json, file, indent=4)

## Set Up VectorStore and Query Engine

In [None]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.postprocessor.cohere_rerank import CohereRerank

# Define an LLM
llm = OpenAI(model="gpt-4")

# Build index with a chunk_size of 1024
splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=250)
nodes = splitter.get_nodes_from_documents(documents)
vector_index = VectorStoreIndex(nodes)

Build a QueryEngine and start querying.

In [None]:
cohere_api_key = os.environ["COHERE_API_KEY"]
cohere_rerank = CohereRerank(api_key=cohere_api_key, top_n=2)

query_engine = vector_index.as_query_engine(
    similarity_top_k=5,
    node_postprocessors=[cohere_rerank],
)

## Import Questions

In [None]:
questions_df = pd.read_parquet("PhoenixRAGUseCaseQuestions.parquet")

In [None]:
questions_df

## Generate Answers for all of the questions

In [None]:
# loop over the questions and generate the answers
for i, row in questions_df.iterrows():
    if i in [25, 50, 75]:
        time.sleep(30)
    question = row["Prompt/ Question"]
    response_vector = query_engine.query(question)
    print(f"Question: {question}\nAnswer: {response_vector.response}\n")

## Phoenix Evals

In [None]:
from phoenix.session.evaluation import get_retrieved_documents

retrieved_documents_df = get_retrieved_documents(px.Client())
retrieved_documents_df

In [None]:
from phoenix.session.evaluation import get_qa_with_reference

queries_df = get_qa_with_reference(px.active_session())
queries_df

Let's now use Phoenix's LLM Evals to evaluate the relevance of the retrieved documents with regards to the query. Note, we've turned on `explanations` which prompts the LLM to explain it's reasoning. This can be useful for debugging and for figuring out potential corrective actions.

In [None]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)

eval_model = OpenAIModel(model="gpt-4")
relevance_evaluator = RelevanceEvaluator(eval_model)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_evaluator = QAEvaluator(eval_model)

retrieved_documents_relevance_df = run_evals(
    evaluators=[relevance_evaluator],
    dataframe=retrieved_documents_df,
    provide_explanation=True,
    concurrency=20,
)[0]

In [None]:
hallucination_eval_df, qa_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_evaluator],
    provide_explanation=True,
    concurrency=20,
)

In [None]:
retrieved_documents_relevance_df = retrieved_documents_relevance_df.reset_index().set_index(
    "context.span_id"
)
retrieved_documents_relevance_df

In [None]:
hallucination_eval_df.head()

## Log the Evals into Phoenix

In [None]:
from phoenix.trace import SpanEvaluations

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_eval_df),
    SpanEvaluations(eval_name="Retrieval Relevance", dataframe=retrieved_documents_relevance_df),
)

In [None]:
session.view()

## Save the Trace and Evals

In [None]:
import os

# Specify and Create the Directory for Trace Dataset
directory = "saved_traces_and_evals"
os.makedirs(directory, exist_ok=True)

# Save the Trace Dataset
trace_id = px.Client().get_trace_dataset().save(directory=directory)