In [None]:
%pip install llama-index-embeddings-openai
%pip install llama-index-llms-azure-openai

In [None]:
!pip install llama-index

In [None]:
pip install --upgrade llama-index llama-index-embeddings-azure-openai

In [1]:
import os
import json
import openai
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

from dotenv import load_dotenv

In [2]:

load_dotenv()

# Get variables from environment
AZURE_OPENAI_MODEL = os.getenv("AZURE_OPENAI_MODEL")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_BASE = os.getenv("AZURE_OPENAI_BASE")
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_VERSION = os.getenv("AZURE_OPENAI_VERSION")
AZURE_OPENAI_EMBEDDING_MODEL = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL")
AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME")

In [3]:
AZURE_OPENAI_EMBEDDING_MODEL

'text-embedding-3-large'

In [4]:
llm = AzureOpenAI(
    model=AZURE_OPENAI_MODEL,
    deployment_name=AZURE_OPENAI_DEPLOYMENT_NAME,
    azure_endpoint=AZURE_OPENAI_BASE,
    api_key=AZURE_OPENAI_KEY,
    api_version=AZURE_OPENAI_VERSION,
)

embed_model = AzureOpenAIEmbedding(
    model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE,
    deployment_name=AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME,
    azure_endpoint=AZURE_OPENAI_BASE,
    api_key=AZURE_OPENAI_KEY,
    api_version=AZURE_OPENAI_VERSION,
)

In [5]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [7]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[r".\papers\seagrass.pdf"]
).load_data()

print("Document ID:", documents[0].doc_id)

Document ID: 41526f72-92be-43c3-8458-de4360380084


In [10]:
from azure.cosmos import CosmosClient, PartitionKey
from llama_index.vector_stores.azurecosmosnosql import (
    AzureCosmosDBNoSqlVectorSearch,
)
from llama_index.core import StorageContext

# create cosmos client
URI = os.getenv("AZURE_COSMOSDB_URI")
KEY = os.getenv("AZURE_COSMOSDB_KEY")
client = CosmosClient(URI, credential=KEY)

# specify vector store properties
indexing_policy = {
    "indexingMode": "consistent",
    "includedPaths": [{"path": "/*"}],
    "excludedPaths": [{"path": '/"_etag"/?'}],
    "vectorIndexes": [{"path": "/embedding", "type": "quantizedFlat"}],
}

vector_embedding_policy = {
    "vectorEmbeddings": [
        {
            "path": "/embedding",
            "dataType": "float32",
            "distanceFunction": "cosine",
            "dimensions": 3072,
        }
    ]
}

partition_key = PartitionKey(path="/id")
cosmos_container_properties_test = {"partition_key": partition_key}
cosmos_database_properties_test = {}

# create vector store
store = AzureCosmosDBNoSqlVectorSearch(
    cosmos_client=client,
    vector_embedding_policy=vector_embedding_policy,
    indexing_policy=indexing_policy,
    cosmos_container_properties=cosmos_container_properties_test,
    cosmos_database_properties=cosmos_database_properties_test,
    create_container=True,
)

storage_context = StorageContext.from_defaults(vector_store=store)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

CosmosHttpResponseError: (BadRequest) Message: {"Errors":["A Container Vector Policy has been provided, but the capability has not been enabled on your account. Visit [https://aka.ms/CosmosVectorSearch](https://aka.ms/CosmosVectorSearch) for instructions how to enable it."]}
ActivityId: e85c612d-7d9d-40d9-b413-faeb73f8700a, Request URI: /apps/9123a7fd-5225-42af-a2d5-20c920d5bf0e/services/bc5e50fb-5f6a-43b6-9998-e7050dbc3673/partitions/99e0c5f0-69f5-4acb-a9c3-42a2796df645/replicas/134032746019763574p, RequestStats: , SDK: Microsoft.Azure.Documents.Common/2.14.0
Code: BadRequest
Message: Message: {"Errors":["A Container Vector Policy has been provided, but the capability has not been enabled on your account. Visit [https://aka.ms/CosmosVectorSearch](https://aka.ms/CosmosVectorSearch) for instructions how to enable it."]}
ActivityId: e85c612d-7d9d-40d9-b413-faeb73f8700a, Request URI: /apps/9123a7fd-5225-42af-a2d5-20c920d5bf0e/services/bc5e50fb-5f6a-43b6-9998-e7050dbc3673/partitions/99e0c5f0-69f5-4acb-a9c3-42a2796df645/replicas/134032746019763574p, RequestStats: , SDK: Microsoft.Azure.Documents.Common/2.14.0

In [None]:
# Alternative: Simple in-memory vector store (temporary workaround)
# Use this while waiting for Azure Cosmos DB vector search to be enabled

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.simple import SimpleVectorStore

# Create a simple in-memory vector store
vector_store = SimpleVectorStore()
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Create the index with the simple vector store
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

print("Index created successfully with simple vector store!")
print(f"Number of documents indexed: {len(documents)}")