### 3 ways to connect to Azure Blob Storage
1.  Microsoft Entra ID service principal
2. Shared access signatures (SAS)
3. Account Access keys

### 1. Service Principle

In [0]:
# create given variales

# container_name = 'container_name'
# storage_account_name = 'storage_account_name'
# application_id = 'application_id'
# service_credential = 'service_credential'
# directory_id = 'directory_id'

In [0]:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account_name}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_account_name}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_account_name}.dfs.core.windows.net", application_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_account_name}.dfs.core.windows.net", service_credential)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_account_name}.dfs.core.windows.net", "https://login.microsoftonline.com/{directory_id}/oauth2/token")


In [0]:
file_path = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/"
dbutils.fs.ls(file_path)
df = spark.read.csv(file_path, header=True, inferSchema=True)
df.show()

In [0]:
# dbutils.fs.mount(
#     source=f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/",
#     mount_point="/mnt/sp/movies",
#     extra_configs=configs
# )

# Verify Mount
display(dbutils.fs.ls("/mnt/rawdata"))

### 2. SAS Token

In [0]:
# storage_account_name = 'storage_account_name'
# container_name = 'container_name'
# sas_token = "sas_token"

In [0]:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account_name}.dfs.core.windows.net", "SAS")
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account_name}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account_name}.dfs.core.windows.net", sas_token)
spark.conf.set(f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net",sas_token)


In [0]:
# File Path
file_path = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/movies.csv"

# Read CSV File
df = spark.read.format("csv").option("header", "true").load(
    file_path,
    storage_options={f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net": sas_token}
)


df.show()

+---+------+--------------------+--------------------+------------+----------+------------+----------+
|_c0|    id|               title|            overview|release_date|popularity|vote_average|vote_count|
+---+------+--------------------+--------------------+------------+----------+------------+----------+
|  0| 19404|Dilwale Dulhania ...|Raj is a rich, ca...|  1995-10-20|    18.433|         8.7|      2763|
|  1|724089|Gabriel's Inferno...|Professor Gabriel...|  2020-07-31|     8.439|         8.7|      1223|
|  2|   278|The Shawshank Red...|Framed in the 194...|  1994-09-23|     65.57|         8.7|     18637|
|  3|   238|       The Godfather|Spanning the year...|  1972-03-14|    63.277|         8.7|     14052|
|  4|761053|Gabriel's Inferno...|The final part of...|  2020-11-19|    26.691|         8.7|       773|
|  5|696374|   Gabriel's Inferno|An intriguing and...|  2020-05-29|     10.51|         8.7|      1993|
|  6|791373|Zack Snyder's Jus...|Determined to ens...|  2021-03-18|  7337

In [0]:
mountpointname = '/mnt/sas/movies'

dbutils.fs.mount(
    source=f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net",
    mount_point=mountpointname,
    extra_configs={
        f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net": sas_token
    }
)

True

### 3. Account Access keys

In [0]:
# ContainerName = 'ContainerName'
# azure_blobstorage_name = 'azure_blobstorage_name'
# mountpointname = '/mnt/acc_key/movies'
# secret_key = 'secret_key'

spark.conf.set(
    f"fs.azure.account.key.{azure_blobstorage_name}.blob.core.windows.net",
    secret_key
)

Without Mounting

In [0]:
df = spark.read.csv(f"wasbs://{ContainerName}@{azure_blobstorage_name}.blob.core.windows.net/movies.csv", header=True, inferSchema=True)
df.show()

+---+------+--------------------+--------------------+------------+----------+------------+----------+
|_c0|    id|               title|            overview|release_date|popularity|vote_average|vote_count|
+---+------+--------------------+--------------------+------------+----------+------------+----------+
|  0| 19404|Dilwale Dulhania ...|Raj is a rich, ca...|  1995-10-20|    18.433|         8.7|      2763|
|  1|724089|Gabriel's Inferno...|Professor Gabriel...|  2020-07-31|     8.439|         8.7|      1223|
|  2|   278|The Shawshank Red...|Framed in the 194...|  1994-09-23|     65.57|         8.7|     18637|
|  3|   238|       The Godfather|Spanning the year...|  1972-03-14|    63.277|         8.7|     14052|
|  4|761053|Gabriel's Inferno...|The final part of...|  2020-11-19|    26.691|         8.7|       773|
|  5|696374|   Gabriel's Inferno|An intriguing and...|  2020-05-29|     10.51|         8.7|      1993|
|  6|791373|Zack Snyder's Jus...|Determined to ens...|  2021-03-18|  7337

Using Mount Point

In [0]:
dbutils.fs.unmount("/mnt/movies")

dbutils.fs.mount(
    source=f"wasbs://{ContainerName}@{azure_blobstorage_name}.blob.core.windows.net",
    mount_point=mountpointname,
    extra_configs={f"fs.azure.account.key.{azure_blobstorage_name}.blob.core.windows.net": secret_key}
)



In [0]:
display(dbutils.fs.ls("/mnt/movies"))

