###mount alds storage

In [0]:
dbutils.secrets.listScopes()

[SecretScope(name='azkvdbwscope')]

In [0]:
# We set the values of our databricks scope name and azure storage account.
databricks_scope_name = "azkvdbwscope"
storage_account_name = "stacdataterraprojprod01"


# We use dbutils to list our secrets name because we need to send them as parameter to the 
# dbutils.secrets.get()
dbutils.secrets.list(databricks_scope_name)

[SecretMetadata(key='databricksappclientid'),
 SecretMetadata(key='databricksappsecret'),
 SecretMetadata(key='tenantid')]

In [0]:
# We assign the values of our secrets to variables.
client_id     = dbutils.secrets.get(databricks_scope_name, "databricksappclientid")
tenant_id     = dbutils.secrets.get(databricks_scope_name, "tenantid")
client_secret = dbutils.secrets.get(databricks_scope_name, "databricksappsecret")

In [0]:
# To access to our data in Azure Storage securely we are going to use OAuth 2.0 with Azure Active Directory
# We need to set the following configuration
configs = {
    "fs.azure.account.auth.type": "OAuth",
    "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id": f"{client_id}",
    "fs.azure.account.oauth2.client.secret": f"{client_secret}",
    "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
}


In [0]:
def mount_adls(container_name):
    mount_point = f"/mnt/{storage_account_name}/{container_name}"
    # Check if directory is already mounted
    if any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()):
        print(f"Directory {mount_point} is already mounted.")
    else:
        dbutils.fs.mount(
            source=f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/",
            mount_point=mount_point,
            extra_configs=configs
        )

# Mount Bronze Container
mount_adls("ctdatabronze")

# Mount Silver Container
mount_adls("ctdatasilver")

# Mount Gold Container
mount_adls("ctdatagold")

Directory /mnt/stacdataterraprojprod01/ctdatabronze is already mounted.
Directory /mnt/stacdataterraprojprod01/ctdatasilver is already mounted.
Directory /mnt/stacdataterraprojprod01/ctdatagold is already mounted.


In [0]:
# List all mounts on databricks
# You must see your bronze, silver and gold contianers and other ones that are by default from databricks.
dbutils.fs.mounts()

[MountInfo(mountPoint='/mnt/stacdataterraprojprod01/ctdatagold', source='abfss://ctdatagold@stacdataterraprojprod01.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/databricks-datasets', source='databricks-datasets', encryptionType=''),
 MountInfo(mountPoint='/mnt/stacdataterraprojprod01/ctdatabronze', source='abfss://ctdatabronze@stacdataterraprojprod01.dfs.core.windows.net/', encryptionType=''),
 MountInfo(mountPoint='/Volumes', source='UnityCatalogVolumes', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-tracking', source='databricks/mlflow-tracking', encryptionType=''),
 MountInfo(mountPoint='/databricks-results', source='databricks-results', encryptionType=''),
 MountInfo(mountPoint='/databricks/mlflow-registry', source='databricks/mlflow-registry', encryptionType=''),
 MountInfo(mountPoint='/Volume', source='DbfsReserved', encryptionType=''),
 MountInfo(mountPoint='/volumes', source='DbfsReserved', encryptionType=''),
 MountInfo(mountPoint='/mnt/s

read file from mount

In [0]:
df = spark.read.text('/mnt/stacdataterraprojprod01/ctdatabronze/ircc.txt')
df.show()

+----------+
|     value|
+----------+
|UkWgAlVaqF|
+----------+

