In [None]:
%%bash
pip install -qU openai tiktoken pinecone gradio tavily-python python-dotenv
pip install -qU arize-phoenix-otel arize-phoenix openinference-instrumentation-langchain
pip install -qU langchain langchain-community langchain-pinecone langchain_openai langgraph

# Set up Pinecone RAG tool

In [None]:
import os
from pinecone import Pinecone
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain_pinecone import PineconeVectorStore
from langchain_core.tools import tool
import gradio as gr

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
embeddings = OpenAIEmbeddings()
pinecone = Pinecone(api_key=os.environ["PINECONE_KEY"], environment=os.environ["PINECONE_ENV"])
index = pinecone.Index(os.environ["PINECONE_INDEX"])
vector_store = PineconeVectorStore(index, embeddings, "text")

qa = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.2), chain_type="stuff", retriever=vector_store.as_retriever())

@tool
def generate_response_phoenix(message, history):
    """Queries the Phoenix docs and returns a response.

    Args:
        message (str): The message to query the Phoenix docs with.
        history (list): The history of the conversation.

    Returns:
        str: The response from the Phoenix docs.
    """
    response = qa.invoke(message)
    return response.get("result")

In [None]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import AnyMessage, add_messages

class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]

### Tool utility functions

In [None]:
from langchain_core.messages import ToolMessage
from langchain_core.runnables import RunnableLambda

from langgraph.prebuilt import ToolNode


def handle_tool_error(state) -> dict:
    error = state.get("error")
    tool_calls = state["messages"][-1].tool_calls
    return {
        "messages": [
            ToolMessage(
                content=f"Error: {repr(error)}\n please fix your mistakes.",
                tool_call_id=tc["id"],
            )
            for tc in tool_calls
        ]
    }


def create_tool_node_with_fallback(tools: list) -> dict:
    return ToolNode(tools).with_fallbacks(
        [RunnableLambda(handle_tool_error)], exception_key="error"
    )


def _print_event(event: dict, _printed: set, max_length=1500):
    current_state = event.get("dialog_state")
    if current_state:
        print("Currently in: ", current_state[-1])
    message = event.get("messages")
    if message:
        if isinstance(message, list):
            message = message[-1]
        if message.id not in _printed:
            msg_repr = message.pretty_repr(html=True)
            if len(msg_repr) > max_length:
                msg_repr = msg_repr[:max_length] + " ... (truncated)"
            print(msg_repr)
            _printed.add(message.id)

### Define the assistant

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_community.tools.tavily_search import TavilySearchResults
from datetime import datetime


class Assistant:
    def __init__(self, runnable: Runnable):
        self.runnable = runnable

    def __call__(self, state: State, config: RunnableConfig):
        while True:
            state = {**state}
            result = self.runnable.invoke(state)
            # If the LLM happens to return an empty response, we will re-prompt it
            # for an actual response.
            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Respond with a real output.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}


from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful customer support assistant for Phoenix "
            " Use the provided tools to answer questions about Phoenix. "
            " When searching, be persistent. Expand your query bounds if the first search returns no results. "
            " If a search comes up empty, expand your search before giving up."
            "\nCurrent time: {time}.",
        ),
        ("placeholder", "{messages}"),
    ]
).partial(time=datetime.now())

tools = [
    TavilySearchResults(max_results=1),
    generate_response_phoenix
    
]
assistant_runnable = primary_assistant_prompt | llm.bind_tools(tools)

### Build the graph

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import tools_condition

builder = StateGraph(State)

# Define nodes: these do the work
builder.add_node("assistant", Assistant(assistant_runnable))
builder.add_node("tools", create_tool_node_with_fallback(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    tools_condition,
)
builder.add_edge("tools", "assistant")

# The checkpointer lets the graph persist its state
# this is a complete memory for the entire graph.
memory = MemorySaver()
graph = builder.compile(checkpointer=memory)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

### Wrap the graph in a function for our Gradio interface

In [None]:
def call_agent(message, history):
    import uuid

    thread_id = str(uuid.uuid4())

    config = {
        "configurable": {
            # Checkpoints are accessed by thread_id
            "thread_id": thread_id,
        }
    }

    event = graph.invoke(
        {"messages": ("user", message)}, config, stream_mode="values"
    )
    
    return event.get("messages")[-1].content

## Set up Gradio interface

In [None]:
iface = gr.ChatInterface(
    call_agent,
    title="Phoenix Docs Query Bot",
    description="Ask me anything about Phoenix documentation!",
)

In [None]:
iface.launch()

# Add tracing

In [None]:
from phoenix.otel import register

os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={os.environ['PHOENIX_API_KEY']}"
os.environ["PHOENIX_CLIENT_HEADERS"] = f"api_key={os.environ['PHOENIX_API_KEY']}"
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com"

tracer_provider = register(project_name="pinecone-rag-agent")

from openinference.instrumentation.langchain import LangChainInstrumentor
LangChainInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)

# Evaluate our chatbot

In [None]:
from phoenix.session.evaluation import get_qa_with_reference
import phoenix as px

qa_with_reference_df = get_qa_with_reference(px.Client(), project_name="pinecone-rag-agent")
qa_with_reference_df

In [None]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    run_evals,
)
import nest_asyncio
nest_asyncio.apply()

qa_evaluator = QAEvaluator(OpenAIModel(model="gpt-4o"))
hallucination_evaluator = HallucinationEvaluator(OpenAIModel(model="gpt-4o"))

qa_correctness_eval_df, hallucination_eval_df = run_evals(
    evaluators=[qa_evaluator, hallucination_evaluator],
    dataframe=qa_with_reference_df,
    provide_explanation=True,
    concurrency=20,
)

In [None]:
from phoenix.trace import SpanEvaluations

px.Client().log_evaluations(
    SpanEvaluations(dataframe=qa_correctness_eval_df, eval_name="Q&A Correctness"),
    SpanEvaluations(dataframe=hallucination_eval_df, eval_name="Hallucination"),
    project_name="pinecone-rag-agent"
)

# Experiment with our Agent

### Create test cases

In [None]:
import pandas as pd
import phoenix as px

df = pd.DataFrame(
    {
        "question": [
            "What options do I have for hosting Phoenix?",
            "How much does Phoenix cost?",
            "What is the latest news on the 2024 election?",
            "How tall is mount everest?",
            "What is the capital of France?",
            "What is the weather in Tokyo?"
        ]
    }
)

dataset = px.Client().upload_dataset(
    dataframe=df,
    input_keys=["question"],
    output_keys=[],
    dataset_name="agent-eval-questions",
)

### Create evaluations

In [None]:
def hallucination_eval(input, output):
    eval_df = {"input": input["question"], "reference": output["reference"], "output": output["input"]["result"]}
    hal_eval = HallucinationEvaluator(OpenAIModel(model="gpt-4o"))
    result = hal_eval.evaluate(eval_df, provide_explanation=True)
    return result[1]

def qa_eval(input, output):
    eval_df = {"input": input["question"], "reference": output["reference"], "output": output["input"]["result"]}
    qa_eval = QAEvaluator(OpenAIModel(model="gpt-4o"))
    result = qa_eval.evaluate(eval_df, provide_explanation=True)
    return result[1]

### Update our Agent

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type="stuff", retriever=vector_store.as_retriever())

In [None]:
def task(input):
    docs = vector_store.similarity_search(input["question"])
    return {"reference": docs[0].page_content, "input": qa.invoke(input["question"])}

### Run our experiment

In [None]:
from openinference.instrumentation.openai import OpenAIInstrumentor
OpenAIInstrumentor().instrument()

In [None]:
from phoenix.experiments import run_experiment

run_experiment(dataset, task=task, evaluators=[hallucination_eval, qa_eval])