## 1. Install Dependencies and Import Libraries

Install LlamaIndex and other dependencies.

In [None]:
!pip install -q arize-phoenix gcsfs llama-index tqdm

Import libraries.

In [None]:
import os

import numpy as np
import openai
from tqdm import tqdm
import json
from urllib.request import urlopen
import pandas as pd
import phoenix as px
from phoenix.experimental.callbacks.llama_index_trace_callback_handler import (
    OpenInferenceTraceCallbackHandler,
)
from gcsfs import GCSFileSystem
from IPython.display import YouTubeVideo
from langchain.chat_models import ChatOpenAI
from llama_index import LLMPredictor, ServiceContext, StorageContext, load_index_from_storage
from llama_index.callbacks import CallbackManager
from llama_index.callbacks.open_inference_callback import as_dataframe
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.simple import SimpleGraphStore


pd.set_option("display.max_colwidth", 1000)

## 2. Configure Your OpenAI API Key

In [None]:
import getpass

if os.environ["OPENAI_API_KEY"] is None:
    openai_api_key = getpass.getpass("🔑 Enter your OpenAI API key: ")
    openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai.api_key

## 3. Download Your Knowledge Base

Download your pre-built index from cloud storage and instantiate your storage context.

In [None]:
file_system = GCSFileSystem(project="public-assets-275721")
index_path = "arize-assets/phoenix/datasets/unstructured/llm/llama-index/arize-docs/index/"
storage_context = StorageContext.from_defaults(
    fs=file_system,
    persist_dir=index_path,
    graph_store=SimpleGraphStore(),  # prevents unauthorized request to GCS
)

Download and unzip a pre-built knowledge base index consisting of chunks of the Arize documentation.

## 4. Run Your Question-Answering Service

💭 Start a LlamaIndex application from your downloaded index. Use the `OpenInferenceTraceCallbackHandler` to store your data in [OpenInference format](https://github.com/Arize-ai/open-inference-spec), an open standard for capturing and storing AI model inferences that enables production LLMapp servers to seamlessly integrate with LLM observability solutions such as Arize and Phoenix.

In [None]:
callback_handler = OpenInferenceTraceCallbackHandler()
service_context = ServiceContext.from_defaults(
    llm_predictor=LLMPredictor(llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)),
    embed_model=OpenAIEmbedding(model="text-embedding-ada-002"),
    callback_manager=CallbackManager(handlers=[callback_handler]),
)
index = load_index_from_storage(
    storage_context,
    service_context=service_context,
)
query_engine = index.as_query_engine()

💭 Ask questions of your question-answering service and view the responses.

In [None]:
# Load queries from GCS - these are commonly asked questions about Arize
queries_url = "http://storage.googleapis.com/arize-assets/phoenix/datasets/unstructured/llm/context-retrieval/arize_docs_queries.jsonl"
queries = []
with urlopen(queries_url) as response:
    for line in response:
        line = line.decode("utf-8").strip()
        data = json.loads(line)
        queries.append(data["query"])
queries

In [None]:
for query in tqdm(queries):
    response = query_engine.query(query)

In [None]:
from phoenix.trace.span_json_encoder import spans_to_jsonl
from phoenix.trace.trace_dataset import TraceDataset
from phoenix.trace.utils import json_lines_to_df
from phoenix import TraceDataset

ds = TraceDataset.from_spans(list(callback_handler.get_spans()))

px.launch_app(trace=ds)

In [None]:
# Dump the contents to a file for safe keeping
from phoenix.trace.span_json_encoder import spans_to_jsonl

export_trace = False
if export_trace:
    with open("trace.jsonl", "w") as f:
        f.write(spans_to_jsonl(callback_handler._tracer.span_buffer))