# Azure OpenAI Service - Q&A with semantic answering using LlamaIndex 🦙 (GPT Index)

Firstly, create a file called `.env` in this folder, and add the following content, obviously with your values:

```
OPENAI_API_KEY=xxxxxx
OPENAI_API_BASE=https://xxxxxxx.openai.azure.com/
```

Then, let's install all dependencies:

In [None]:
import os
from dotenv import load_dotenv

from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader, PromptHelper
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

# Load environment variables
load_dotenv()

# 1. Configure global LLM and embedding models
Settings.llm = AzureOpenAI(
    model="gpt-4o",
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)
Settings.embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

# 2. Override global chunking parameters to avoid None-based errors
Settings.chunk_size = 1024
Settings.chunk_overlap = 20

# 3. Define explicit PromptHelper to handle context and chunking
prompt_helper = PromptHelper(
    context_window=3000,
    num_output=256,
    chunk_overlap_ratio=0.1,
    chunk_size_limit=1000
)

# 4. Load your documents
documents = SimpleDirectoryReader("../data/qna/").load_data()

# 5. Build the index—passing prompt helper explicitly
index = VectorStoreIndex.from_documents(
    documents,
    prompt_helper=prompt_helper
)

# 6. Create query engine and run the prompt
query_engine = index.as_query_engine(verbose=True)
response = query_engine.query("Can you let me knwo about Andaman package.")
print(response)
