LlamaIndex with Azure OpenAI inference API

In [None]:
%pip install python-dotenv  llama-index llama-index-llms-azure-openai llama-index-embeddings-azure-openai

In [1]:
# I will keep credentials in .env file
import os
from dotenv import load_dotenv

# lets check it's working
load_dotenv()
print(os.getenv("AZURE_OPENAI_ENDPOINT"))

https://dyai7248466370.openai.azure.com/


In [2]:
import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
# Create a client to connect to the endpoint:
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

# chat completion model
llm = AzureOpenAI(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)
# embedding model
embed_model = AzureOpenAIEmbedding(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME2"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME2"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

In [4]:
from llama_index.core import Settings, VectorStoreIndex

Settings.llm = llm
Settings.embed_model = embed_model

In [5]:
from llama_index.core import Document

documents = [
    Document(text="Andriy Shevchenko is a Ukrainian former professional football player."),
    Document(text="Taras Shevchenko's poetry book Kobzar was published 1840."),
    Document(text="Itinerant Ukrainian bard who sang to his own accompaniment, played on a multistringed kobza or bandura, is usualy named kobzar."),
]
index = VectorStoreIndex.from_documents(documents)

INFO:httpx:HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/text-embedding-3-small/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/text-embedding-3-small/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


In [6]:
query_engine = index.as_query_engine()
answer = query_engine.query("What kobzars do?")
print(answer)

INFO:httpx:HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/text-embedding-3-small/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/text-embedding-3-small/embeddings?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/gpt-35-turbo/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
HTTP Request: POST https://dyai7248466370.openai.azure.com//openai/deployments/gpt-35-turbo/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
Based on the given context information, kobzars are Ukrainian bards who sing to their own accompaniment, usually playing on a multistringed kobza or bandura.
