In [None]:
import os
import dotenv

dotenv.load_dotenv()

if not os.getenv("GITHUB_TOKEN"):
    raise ValueError("GITHUB_TOKEN is not set")

os.environ["OPENAI_API_KEY"] = os.getenv("GITHUB_TOKEN")
os.environ["OPENAI_BASE_URL"] = "https://models.inference.ai.azure.com/"

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.core import Settings
import os

llm = OpenAI(
    model="gpt-4o-mini",
    api_key=os.getenv("OPENAI_API_KEY"),
    api_base=os.getenv("OPENAI_BASE_URL"),
)

embed_model = OpenAIEmbedding(
    model="text-embedding-3-small",
    api_key=os.getenv("OPENAI_API_KEY"),
    api_base=os.getenv("OPENAI_BASE_URL"),
)

In [None]:
import phoenix as px
px.launch_app()

In [None]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

In [None]:
documents = SimpleDirectoryReader("../Big Star Collectibles").load_data()

index_1 = VectorStoreIndex.from_documents(documents, insert_batch_size=150)
index_1.storage_context.persist(persist_dir="../local_index_1")

In [None]:
# one example solution
Settings.chunk_size = 500
Settings.chunk_overlap = 100
index_2 = VectorStoreIndex.from_documents(documents, insert_batch_size=150)
index_2.storage_context.persist(persist_dir="../local_index_2")

In [None]:
# solution
query_engine = index.as_query_engine(
  llm=llm
)
query_engine.query("When was Big Star Collectibles Started?")

In [None]:
query_engine.query("Who started Big Star Collectibles?")