<div><img src="https://github.com/Datalex-io/datalex-lab/blob/main/databricks/images/databricks-connect-azure-storage.PNG?raw=true" /></div>

#Mounting your Storages

## Azure Data Lake Storage Gen1

In [0]:
configs = {"<prefix>.oauth2.access.token.provider.type": "ClientCredential",
           "<prefix>.oauth2.client.id": "<application-id>",
           "<prefix>.oauth2.credential": dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>"),
           "<prefix>.oauth2.refresh.url": "https://login.microsoftonline.com/<directory-id>/oauth2/token"}
# Optionally, you can add <directory-name> to the source URI of your mount point.
dbutils.fs.mount(
  source = "adl://<storage-resource-name>.azuredatalakestore.net/<directory-name>",
  mount_point = "/mnt/<mount-name>",
  extra_configs = configs)

## Azure Data Lake Storage Gen2

In [0]:
configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.id": "<application-id>",
           "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>"),
           "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/<directory-id>/oauth2/token"}

# Optionally, you can add <directory-name> to the source URI of your mount point.
dbutils.fs.mount(
  source = "abfss://<file-system-name>@<storage-account-name>.dfs.core.windows.net/",
  mount_point = "/mnt/<mount-name>",
  extra_configs = configs)

In [0]:
configs = { "fs.azure.account.auth.type": "CustomAccessToken", 
            "fs.azure.account.custom.token.provider.class": spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName") 
} 

# Optionally, you can add <directory-name> to the source URI of your mount point. 
dbutils.fs.mount( 
source = "abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/", 
mount_point = "/mnt/<mount-name>", 
extra_configs = configs)

In [0]:
dbutils.fs.mount(
  source = "wasbs://<container>@<storage-account-name>.blob.core.windows.net/",
  mount_point = "/mnt/<mount-name>",
  extra_configs  = {"fs.azure.account.key.<storage-account-name>.blob.core.windows.net" : "<storage-account-key>"}
)

In [0]:
dbutils.fs.mount(
  source = "wasbs://<container>@<storage-account-name>.blob.core.windows.net/"
  mount_point = "/mnt/<mount-name>",
  extra_configs = {"fs.azure.sas.default.<storage-account-name>.blob.core.windows.net" : "<sas-key>"}
)

## Azure Storage Account

In [0]:
dbutils.fs.mount(
  source = "wasbs://<container>@<storage-account-name>.blob.core.windows.net/",
  mount_point = "/mnt/<mount-name>",
  extra_configs  = {"fs.azure.account.key.<storage-account-name>.blob.core.windows.net" : "<storage-account-key>"}
)

In [0]:
dbutils.fs.mount(
  source = "wasbs://<container>@<storage-account-name>.blob.core.windows.net/"
  mount_point = "/mnt/<mount-name>",
  extra_configs = {"fs.azure.sas.default.<storage-account-name>.blob.core.windows.net" : "<sas-key>"}
)

#Set your connexion

##Azure Data Lake Storage Gen1

In [0]:
spark.conf.set("fs.adl.oauth2.access.token.provider.type", "ClientCredential")
spark.conf.set("fs.adl.oauth2.client.id", "<application-id>")
spark.conf.set("fs.adl.oauth2.credential", dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>")
spark.conf.set("fs.adl.oauth2.refresh.url", "https://login.microsoftonline.com/<tenant-id>/oauth2/token")

## Azure Data Lake Storage Gen2

In [0]:
spark.conf.set("fs.azure.account.auth.type.<storage-account-name>.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.<storage-account-name>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.<storage-account-name>.dfs.core.windows.net", "<application-id>")
spark.conf.set("fs.azure.account.oauth2.client.secret.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>"))
spark.conf.set("fs.azure.account.oauth2.client.endpoint.<storage-account-name>.dfs.core.windows.net", "https://login.microsoftonline.com/<tenant-id>/oauth2/token")

In [0]:
spark.conf.set("fs.azure.account.key.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>"))

In [0]:
spark.conf.set("fs.azure.account.auth.type.<storage-account>.dfs.core.windows.net", "SAS")
spark.conf.set("fs.azure.sas.token.provider.type.<storage-account>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set("fs.azure.sas.fixed.token.<storage-account>.dfs.core.windows.net", dbutils.secrets.get(scope="<scope>", key="<sas-token-key>"))

##Azure Storage Account:

In [0]:
spark.conf.set("fs.azure.account.key.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope = "<scope-name>", key = "<key-name-for-service-credential>"))

In [0]:
spark.conf.set("fs.azure.account.auth.type.<storage-account>.dfs.core.windows.net", "SAS")
spark.conf.set("fs.azure.sas.token.provider.type.<storage-account>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set("fs.azure.sas.fixed.token.<storage-account>.dfs.core.windows.net", dbutils.secrets.get(scope="<scope>", key="<sas-token-key>"))

In [0]:
# READ / WERIRT

#Use API

##Azure Data Lake Storage Gen1

In [0]:
pip install azure-mgmt-resource
pip install azure-mgmt-datalake-store
pip install azure-datalake-store

In [0]:
from azure.datalake.store import core, lib

#Connect to Azure
adls_credentials = lib.auth(tenant_id=<tenant-id>, client_secret=<application-key>, client_id=<application-id>)
adls_name = '<adls-name>'
adls_client = core.AzureDLFileSystem(adls_credentials, store_name=adls_name)

print(adls_client.listdir())

##Azure Data Lake Storage Gen2

In [0]:
pip install azure-mgmt-resource

from azure.storage.filedatalake import DataLakeServiceClient

In [0]:
from azure.identity import ClientSecretCredential

# get credentials
credentials = ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
url = "{}://{}.dfs.core.windows.net".format("https", adls_name)
service_client = DataLakeServiceClient(account_url=url, credential=credentials)

# list containers
service_client.list_file_systems()
# get directory properties
file_system = service_client.get_file_system_client(file_system=container_name)
directory_properties = file_system.get_directory_client(directory=directory_name

In [0]:
service_client = DataLakeServiceClient(account_url=url, credential=connection_string)

# list containers
service_client.list_file_systems()
# get directory properties
file_system = service_client.get_file_system_client(file_system=container_name)
directory_properties = file_system.get_directory_client(directory=directory_name).get_directory_properties()

##Azure Storage Account

In [0]:
pip install azure-storage

from azure.storage.blob import BlobClient

In [0]:
from azure.identity import ClientSecretCredential

# get credentials
credentials = ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)

url = "https://{}.blob.core.windows.net".format(storage_name)
blob_service_client = BlobServiceClient(account_url=.url, credential=credentials)

# list containers
blob_service_client.list_containers(include_metadata=True)

# list blobs
container_client = blob_service_client.get_container_client(container_name)
container_client.list_blobs()

In [0]:
blob_service_client = BlobServiceClient.from_connection_string(conn_str=connection_string)

# list containers
blob_service_client.list_containers(include_metadata=True)

# list blobs
container_client = blob_service_client.get_container_client(container_name)
container_client.list_blobs()