## Install Required Python Modules



*`az synapse spark pool update` with --library-requirements requirements.txt* - *Concern: Long time(~ 20 mins) for the statement to return status*

*In this implementation a consolidated wheel file was used as a workspace package. - Concern - even this is taking ~15 mins for Spark pool to get updated*

# Import Libraries

In [None]:
# pip install azure-storage-file-datalake
import json
from collections import defaultdict
from datetime import datetime
from notebookutils import mssparkutils
from adlsaccess.synadlsaccess import ADLSOps

## Define Parameters to Enable connection to Storage

In [None]:

# Read from pipeline
storage_acct = ""

# Mostly constant

keyvault_ls_name = "Ls_NYCTaxi_KeyVault"
storage_key_name = "datalakeKey"

adls_ops = ADLSOps(storage_acct, keyvault_ls_name, storage_key_name)
current_ts = adls_ops.get_current_ts()

# Secrets based values
storage_access_key = mssparkutils.credentials.getSecretWithLS(keyvault_ls_name, storage_key_name)
storage_acct_connection = f"DefaultEndpointsProtocol=https;AccountName={storage_acct};AccountKey={storage_access_key};EndpointSuffix=core.windows.net"


## Read Config from ADLS

In [None]:
adls_ops = ADLSOps(storage_acct, keyvault_ls_name, storage_key_name)
adls_ops.initialize_storage_account(storage_access_key)
config = adls_ops.read_config_from_adls()

## Evaluate overall ACLs needed in short form

In [None]:

ad_perms = adls_ops.evaluate_ad_acl_perms(config, current_ts)

# Gather list of ADs and their ids - ids are needed for granting ACLs
# One Option - Requires APP ID - https://github.com/AzureAD/microsoft-authentication-library-for-python
# for now reading from Vault
try:
    ad_map = { ad: mssparkutils.credentials.getSecretWithLS(keyvault_ls_name, ad) for ad in adls_ops.ad_set}
except Exception as e:
    config_check_errors.append(f"No ID returned for given Microsoft Entra ID name. error is {e}")
    
print(ad_map)

## Grant ACLs Recursively

In [None]:
adls_ops.update_parent_folder_acls(ad_perms, ad_map)
adls_ops.update_ad_acls(ad_perms, ad_map)
adls_ops.check_config_errors()


## Update Config file with latest run time
- rename with timestamp and create a new config file to reflect lastUpdatedDatalake timestamp

In [None]:
# mssparkutils.fs.help()
source_config = f"abfss://{adls_ops.config_container}@{storage_acct}.dfs.core.windows.net{adls_ops.config_file_path}{adls_ops.config_file_name}"
backup_config = f"abfss://{adls_ops.config_container}@{storage_acct}.dfs.core.windows.net{adls_ops.config_file_path}{adls_ops.config_file_name}_{current_ts}"
mssparkutils.fs.mv(source_config, backup_config, overwrite=True)
mssparkutils.fs.put(source_config, json.dumps(config, indent=2, default=str), overwrite=True)
