## Creating an index in Azure AI Search

In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SimpleField, SearchableField, SearchFieldDataType, SearchIndex
from dotenv import load_dotenv
import os
load_dotenv()

True

In [3]:
############## Assuming dummy documents are provided ##############
# In a real scenario, these would be fetched from various sources or could be chunks of larger documents.
# id is a unique identifier for each document.
# content is the text content of the document.
# title is the name of the document.
# security_groups is a list of groups that have access to the document. In Azure AI search, this could be used for access control.
# This is a simplified example; in practice, you would have more complex security and metadata handling

documents = [
    {
        "id": "1",
        "content": 'This is the first dummy chunk of text.',
        "title": "chunk_1.txt",
        "security_groups": ['Owners', 'Members']
    },
    {
        "id": "2",
        "content": 'This is the second dummy chunk of text.',
        "title": "EmperorPenguins.txt",
        "security_groups": ['Owners', 'Members']
    },
    {
        "id": "3",
        "content": 'This is the third dummy chunk of text.',
        "title": "RedPanda.txt",
        "security_groups": ['Owners']
    }
]

In [4]:
fields = [
    SimpleField(
        name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=False, facetable=False
    ),
    SearchableField(
        name="content",
        type=SearchFieldDataType.String,
        sortable=False,
        filterable=False,
        facetable=False,
        searchable=True,
        analyzer_name='en.lucene',
    ),
    SearchableField(
        name="title", type=SearchFieldDataType.String, searchable=False, filterable=False
    ),
    SimpleField(
        name = 'security_groups',
        type = "Collection(Edm.String)",
        filterable = True
    )
]

In [5]:
# index name is the name of the index you want to create or update
index_name = "dummy-index"
index_client = SearchIndexClient(endpoint=os.getenv('AI_SEARCH_ENDPOINT'), credential=AzureKeyCredential(os.getenv('AI_SEARCH_KEY')))  
search_client = SearchClient(endpoint=os.getenv('AI_SEARCH_ENDPOINT'), index_name=index_name, credential=AzureKeyCredential(os.getenv('AI_SEARCH_KEY')))
index = SearchIndex(name=index_name, fields=fields) 
result = index_client.create_or_update_index(index)  
print(f'{result.name} created')
result = search_client.upload_documents(documents)
#upload might take a while depending on the number and size of documents

dummy-index created


In [6]:
# In a next notebook we will also generate embeddings for these documents and upload them to the index.

## Fetching Documents from an index in Azure AI Search

In [7]:
def fetch_all_from_index(index_name):
    client = SearchClient(
        index_name=index_name,
        endpoint= os.getenv("AI_SEARCH_ENDPOINT"),
        credential=AzureKeyCredential(os.getenv("AI_SEARCH_KEY")),
    )
    results = client.search(search_text="*")

    return list(results)


In [8]:
index_name = "dummy-index"
_ = fetch_all_from_index(index_name)
_

[{'content': 'This is the third dummy chunk of text.',
  'id': '3',
  'security_groups': ['Owners'],
  'title': 'RedPanda.txt',
  '@search.score': 1.0,
  '@search.reranker_score': None,
  '@search.highlights': None,
  '@search.captions': None},
 {'content': 'This is the second dummy chunk of text.',
  'id': '2',
  'security_groups': ['Owners', 'Members'],
  'title': 'EmperorPenguins.txt',
  '@search.score': 1.0,
  '@search.reranker_score': None,
  '@search.highlights': None,
  '@search.captions': None},
 {'content': 'This is the first dummy chunk of text.',
  'id': '1',
  'security_groups': ['Owners', 'Members'],
  'title': 'chunk_1.txt',
  '@search.score': 1.0,
  '@search.reranker_score': None,
  '@search.highlights': None,
  '@search.captions': None}]

## Deleting an index in Azure AI Search

In [9]:
def delete_index(index_name):
    index_client = SearchIndexClient(
        endpoint=os.getenv('AI_SEARCH_ENDPOINT'), credential=AzureKeyCredential(os.getenv('AI_SEARCH_KEY')))
    index_client.delete_index(index_name)
    print(f"{index_name} deleted")


In [10]:
index_name = "dummy-index"
delete_index(index_name)

dummy-index deleted
