## Document Permissions in Azure AI Search

## 1. Load Connections

In [33]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os

load_dotenv(override=True) # take environment variables from .env.

# The following variables from your .env file are used in this notebook
endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
credential = DefaultAzureCredential()
index_name = os.getenv("AZURE_SEARCH_INDEX", "document-permissions-sample")
indexer_name = os.getenv("AZURE_SEARCH_INDEXER", "document-permissions-sample-indexer")
datasource_name = os.getenv("AZURE_SEARCH_DATASOURCE", "document-permissions-sample-datasource")
adls_gen2_account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME", "documentpermissionssample")
adls_gen2_container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", "documentpermissionssample")
adls_gen2_connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
adls_gen2_resource_id = os.environ["AZURE_STORAGE_RESOURCE_ID"]
token_provider = get_bearer_token_provider(credential, "https://search.azure.com/.default")

## 2. Create Index

In [34]:
from azure.search.documents.indexes.models import SearchField, SearchIndex, PermissionFilter, SearchIndexPermissionFilterOption
from azure.search.documents.indexes import SearchIndexClient

index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
index = SearchIndex(
    name=index_name,
    fields=[
        SearchField(name="id", type="Edm.String", key=True, filterable=True, sortable=True),
        SearchField(name="content", type="Edm.String", searchable=True, filterable=False, sortable=False),
        SearchField(name="oids", type="Collection(Edm.String)", filterable=True, permission_filter=PermissionFilter.USER_IDS),
        SearchField(name="groups", type="Collection(Edm.String)", filterable=True, permission_filter=PermissionFilter.GROUP_IDS),
        SearchField(name="metadata_storage_path", type="Edm.String", searchable=True),
        SearchField(name="metadata_storage_name", type="Edm.String", searchable=True)
    ],
    permission_filter_option=SearchIndexPermissionFilterOption.ENABLED
)

index_client.create_or_update_index(index=index)
print(f"Index '{index_name}' created with permission filter option enabled.")

Index 'document-permissions-sample' created with permission filter option enabled.


## 3. Create data source

In [35]:
from azure.search.documents.indexes.models import SearchIndexerDataSourceConnection, SearchIndexerDataSourceType, IndexerPermissionOption, SearchIndexerDataContainer, DataSourceCredentials
from azure.search.documents.indexes import SearchIndexerClient
indexer_client = SearchIndexerClient(endpoint=endpoint, credential=credential)
datasource = SearchIndexerDataSourceConnection(
    name=datasource_name,
    type=SearchIndexerDataSourceType.ADLS_GEN2,
    connection_string=f"ResourceId={adls_gen2_resource_id};",
    container=SearchIndexerDataContainer(name=adls_gen2_container_name),
    indexer_permission_options=[IndexerPermissionOption.GROUP_IDS]
)

indexer_client.create_or_update_data_source_connection(datasource)
print(f"Datasource '{datasource_name}' created with permission filter option enabled.")

Datasource 'document-permissions-sample-datasource' created with permission filter option enabled.


## 4. Get group id

In [36]:
from msgraph import GraphServiceClient
client = GraphServiceClient(credentials=credential, scopes=["https://graph.microsoft.com/.default"])

groups = await client.me.member_of.get()
group_id = groups.value[0].id 

## 5. Upload sample directory and file

In [37]:
from azure.storage.filedatalake import DataLakeServiceClient

service = DataLakeServiceClient.from_connection_string(adls_gen2_connection_string, credential=credential)
container = service.get_file_system_client(adls_gen2_container_name)
if not container.exists():
    container.create_file_system()
data_dir_client = container.get_directory_client("data")
data_dir_client.create_directory()
file_client = data_dir_client.create_file("sample.txt")
file_client.upload_data("This is a sample file.", overwrite=True)

root_dir_client = container.get_directory_client("/")
root_dir_client.update_access_control_recursive(f"group:{group_id}:rwx")


{'counters': {'directories_successful': 2, 'files_successful': 1, 'failure_count': 0}, 'continuation': None}

## 6. Run indexer

In [39]:
from azure.search.documents.indexes.models import SearchIndexer, FieldMapping

indexer = SearchIndexer(
    name=indexer_name,
    target_index_name=index_name,
    data_source_name=datasource_name,
    field_mappings=[
        FieldMapping(source_field_name="metadata_group_ids", target_field_name="groups"),
        FieldMapping(source_field_name="metadata_user_ids", target_field_name="oids"),
    ]
)

indexer_client.create_or_update_indexer(indexer)
print(f"Indexer '{indexer_name}' created")


Indexer 'document-permissions-sample-indexer' created


## 7. Search sample data using x-ms-query-source-authorization 

In [43]:
from azure.search.documents import SearchClient
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

results = search_client.search(search_text="*", x_ms_query_source_authorization=token_provider(), select="metadata_storage_path,oids,groups", order_by="id asc")
for result in results:
    print(f"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}")

Path: https://magotteiadlsgen2.blob.core.windows.net/documentpermissionssample/data/sample.txt, OID: ['none'], Group: ['ec5aece9-33fc-4b2e-abe1-aedf771357a3']


## 8. Search sample data without x-ms-query-source-authorization 

In [44]:
from azure.search.documents import SearchClient
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

results = search_client.search(search_text="*", x_ms_query_source_authorization=None, select="metadata_storage_path,oids,groups", order_by="id asc")
for result in results:
    print(f"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}")

Path: https://magotteiadlsgen2.blob.core.windows.net/documentpermissionssample/data/sample.txt, OID: ['none'], Group: ['ec5aece9-33fc-4b2e-abe1-aedf771357a3']
