## Document Permissions in Azure AI Search

## 1. Load Connections

In [None]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os

load_dotenv(override=True) # take environment variables from .env.

# The following variables from your .env file are used in this notebook
endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
credential = DefaultAzureCredential()
index_name = os.getenv("AZURE_SEARCH_INDEX", "document-permissions-sample")
indexer_name = os.getenv("AZURE_SEARCH_INDEXER", "document-permissions-sample-indexer")
datasource_name = os.getenv("AZURE_SEARCH_DATASOURCE", "document-permissions-sample-datasource")
adls_gen2_account_name = os.getenv("AZURE_STORAGE_ACCOUNT_NAME", "documentpermissionssample")
adls_gen2_container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", "documentpermissionssample")
adls_gen2_connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
adls_gen2_resource_id = os.environ["AZURE_STORAGE_RESOURCE_ID"]
token_provider = get_bearer_token_provider(credential, "https://search.azure.com/.default")

## 2. Create Index

In [None]:
from azure.search.documents.indexes.models import SearchField, SearchIndex, PermissionFilter, SearchIndexPermissionFilterOption
from azure.search.documents.indexes import SearchIndexClient

index_client = SearchIndexClient(endpoint=endpoint, credential=credential)
index = SearchIndex(
    name=index_name,
    fields=[
        SearchField(name="id", type="Edm.String", key=True, filterable=True, sortable=True),
        SearchField(name="content", type="Edm.String", searchable=True, filterable=False, sortable=False),
        SearchField(name="oids", type="Collection(Edm.String)", filterable=True, permission_filter=PermissionFilter.USER_IDS),
        SearchField(name="groups", type="Collection(Edm.String)", filterable=True, permission_filter=PermissionFilter.GROUP_IDS),
        SearchField(name="metadata_storage_path", type="Edm.String", searchable=True),
        SearchField(name="metadata_storage_name", type="Edm.String", searchable=True)
    ],
    permission_filter_option=SearchIndexPermissionFilterOption.ENABLED
)

index_client.create_or_update_index(index=index)
print(f"Index '{index_name}' created with permission filter option enabled.")

## 3. Create data source

In [None]:
from azure.search.documents.indexes.models import SearchIndexerDataSourceConnection, SearchIndexerDataSourceType, IndexerPermissionOption, SearchIndexerDataContainer, DataSourceCredentials
from azure.search.documents.indexes import SearchIndexerClient
indexer_client = SearchIndexerClient(endpoint=endpoint, credential=credential)
datasource = SearchIndexerDataSourceConnection(
    name=datasource_name,
    type=SearchIndexerDataSourceType.ADLS_GEN2,
    connection_string=f"ResourceId={adls_gen2_resource_id};",
    container=SearchIndexerDataContainer(name=adls_gen2_container_name),
    indexer_permission_options=[IndexerPermissionOption.GROUP_IDS]
)

indexer_client.create_or_update_data_source_connection(datasource)
print(f"Datasource '{datasource_name}' created with permission filter option enabled.")

## 4. Get group ids

In [None]:
from msgraph import GraphServiceClient
client = GraphServiceClient(credentials=credential, scopes=["https://graph.microsoft.com/.default"])

groups = await client.me.member_of.get()
first_group_id = groups.value[0].id
second_group_id = groups.value[1].id

## 5. Upload sample directory and file

In [None]:
from azure.storage.filedatalake import DataLakeServiceClient
import requests

service = DataLakeServiceClient.from_connection_string(adls_gen2_connection_string, credential=credential)
container = service.get_file_system_client(adls_gen2_container_name)
if not container.exists():
    container.create_file_system()
root_dir_client = container.get_directory_client("/")
state_parks_dir_client = container.get_directory_client("state-parks")
state_parks_dir_client.create_directory()
root_dir_client.update_access_control_recursive(f"group:{first_group_id}:rwx")
root_dir_client.update_access_control_recursive(f"group:{second_group_id}:rwx")

oregon_dir_client = state_parks_dir_client.create_sub_directory("oregon")
oregon_dir_client.create_directory()
file_client = oregon_dir_client.create_file("oregon_state_parks.csv")
oregon_state_parks_content = requests.get("https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/state-parks/Oregon/oregon_state_parks.csv").content.decode("utf-8")
file_client.upload_data(oregon_state_parks_content, overwrite=True)
oregon_dir_client.update_access_control_recursive(f"group:{first_group_id}:rwx")

washington_dir_client = state_parks_dir_client.create_sub_directory("washington")
washington_dir_client.create_directory()
file_client = washington_dir_client.create_file("washington_state_parks.csv")
washington_state_parks_content = requests.get("https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/state-parks/Washington/washington_state_parks.csv").content.decode("utf-8")
file_client.upload_data(washington_state_parks_content, overwrite=True)
washington_dir_client.update_access_control_recursive(f"group:{second_group_id}:rwx")

## 6. Run indexer

In [None]:
from azure.search.documents.indexes.models import SearchIndexer, FieldMapping

indexer = SearchIndexer(
    name=indexer_name,
    target_index_name=index_name,
    data_source_name=datasource_name,
    field_mappings=[
        FieldMapping(source_field_name="metadata_group_ids", target_field_name="groups"),
        FieldMapping(source_field_name="metadata_user_ids", target_field_name="oids"),
    ]
)

indexer_client.create_or_update_indexer(indexer)
print(f"Indexer '{indexer_name}' created")


## 7. Search sample data using x-ms-query-source-authorization 

In [None]:
from azure.search.documents import SearchClient
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

results = search_client.search(search_text="*", x_ms_query_source_authorization=token_provider(), select="metadata_storage_path,oids,groups", order_by="id asc")
for result in results:
    print(f"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}")

## 8. Search sample data without x-ms-query-source-authorization 

In [None]:
from azure.search.documents import SearchClient
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

results = search_client.search(search_text="*", x_ms_query_source_authorization=None, select="metadata_storage_path,oids,groups", order_by="id asc")
for result in results:
    print(f"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}")