In [None]:
!pip install -q "arize-phoenix[experimental]==0.0.33rc2" langchain openai chromadb

In [None]:
import json
import os
from getpass import getpass
from urllib.request import urlopen

import openai
import phoenix as px
import getpass
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from phoenix.experimental.callbacks.langchain_tracer import OpenInferenceTracer
from phoenix.trace.trace_dataset import TraceDataset
from tqdm import tqdm

In [None]:
if os.environ.get("OPENAI_API_KEY") is None:
    openai_api_key = getpass.getpass("🔑 Enter your OpenAI API key: ")
    openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai.api_key

In [None]:
log_to_langsmith = True

# Log to Langsmith if the key exists
if os.environ.get("LANGCHAIN_API_KEY") and log_to_langsmith:
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
    os.environ["LANGCHAIN_PROJECT"] = "phoenix-develop"
    print("🔑 Langsmith API key found, logging to Langsmith")
else:
    print("💤 No Langsmith API key found, not logging to Langsmith")

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
vector_store = Chroma(
    persist_directory=f"/Users/{getpass.getuser()}/langchain-chroma-arize-docs",
    embedding_function=embeddings,
)
chain_type = "refine"  # stuff, refine, map_reduce, and map_rerank
chat_model_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=chat_model_name)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=vector_store.as_retriever(),
)
tracer = OpenInferenceTracer()

In [None]:
url = "http://storage.googleapis.com/arize-assets/phoenix/datasets/unstructured/llm/context-retrieval/arize_docs_queries.jsonl"
queries = []
with urlopen(url) as response:
    for line in response:
        line = line.decode("utf-8").strip()
        data = json.loads(line)
        queries.append(data["query"])
queries

In [None]:
for query in tqdm(queries):
    chain.run(query, callbacks=[tracer])

In [None]:
ds = TraceDataset.from_spans(tracer.span_buffer)
px.launch_app(trace=ds)