## 1. Install Dependencies and Import Libraries

Install LlamaIndex and other dependencies.

In [None]:
!pip install "arize-phoenix[experimental,llama-index]" gcsfs tqdm

Import libraries.

In [None]:
import json
import os
from urllib.request import urlopen

import openai
import pandas as pd
import phoenix as px
from gcsfs import GCSFileSystem
from llama_index import (
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    set_global_handler,
)
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.simple import SimpleGraphStore
from llama_index.llms import OpenAI
from llama_index.postprocessor.cohere_rerank import CohereRerank
from tqdm import tqdm

pd.set_option("display.max_colwidth", 1000)

## 2. Configure Your OpenAI API Key

In [None]:
from getpass import getpass

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

if not (cohere_api_key := os.getenv("COHERE_API_KEY")):
    cohere_api_key = getpass("🔑 Enter your COHERE API key: ")
os.environ["COHERE_API_KEY"] = cohere_api_key

## 3. Download Your Knowledge Base

Download your pre-built index from cloud storage and instantiate your storage context.

In [None]:
file_system = GCSFileSystem(project="public-assets-275721")
index_path = "arize-assets/phoenix/datasets/unstructured/llm/llama-index/arize-docs/index/"
storage_context = StorageContext.from_defaults(
    fs=file_system,
    persist_dir=index_path,
    graph_store=SimpleGraphStore(),  # prevents unauthorized request to GCS
)

Download and unzip a pre-built knowledge base index consisting of chunks of the Arize documentation.

## 4. Run Your Question-Answering Service

💭 Start a LlamaIndex application from your downloaded index. Use the `OpenInferenceTraceCallbackHandler` to store your data in [OpenInference format](https://github.com/Arize-ai/open-inference-spec), an open standard for capturing and storing AI model inferences that enables production LLMapp servers to seamlessly integrate with LLM observability solutions such as Arize and Phoenix.

In [None]:
service_context = ServiceContext.from_defaults(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0),
    embed_model=OpenAIEmbedding(model="text-embedding-ada-002"),
)
index = load_index_from_storage(
    storage_context,
    service_context=service_context,
)
reranker = CohereRerank(top_n=2)
query_engine = index.as_query_engine(
    node_postprocessors=[reranker],
)

💭 Ask questions of your question-answering service and view the responses.

In [None]:
# Load queries from GCS - these are commonly asked questions about Arize
queries_url = "http://storage.googleapis.com/arize-assets/phoenix/datasets/unstructured/llm/context-retrieval/arize_docs_queries.jsonl"
queries = []
with urlopen(queries_url) as response:
    for line in response:
        line = line.decode("utf-8").strip()
        data = json.loads(line)
        queries.append(data["query"])
queries

In [None]:
px.launch_app()
set_global_handler("arize_phoenix")

In [None]:
for query in tqdm(queries):
    response = query_engine.query(query)