Use the following Azure Databricks storage setup block only if you are using Azure Databricks. You can refer to the instructions here to get started:
https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/adls-gen2/azure-datalake-gen2-sp-access

If you are using Synapse Spark and if your data is residing on the storage attached to the Synapse Spark workspace, you can skip the below storage setup section.

In [None]:
%scala
val storageAccountName = "<INSERT STORAGE ACCOUNT>"
val fileSystemName = "<INSERT CONTAINER NAME>"

val commonPath = "abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net"

# AAD Application Details
val appID = "<INSERT APP ID>"
val secret = "<INSERT SECRET>"
val tenantID = "<INSERT TENANT ID>"

spark.conf.set("fs.azure.account.auth.type." + storageAccountName + ".dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type." + storageAccountName + ".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id." + storageAccountName + ".dfs.core.windows.net", "" + appID + "")
spark.conf.set("fs.azure.account.oauth2.client.secret." + storageAccountName + ".dfs.core.windows.net", "" + secret + "")
spark.conf.set("fs.azure.account.oauth2.client.endpoint." + storageAccountName + ".dfs.core.windows.net", "https://login.microsoftonline.com/" + tenantID + "/oauth2/token")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "true")
dbutils.fs.ls("abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net/")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "false")

In [None]:
%scala
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}

val driverDeltaPath = commonPath + "/delta"

val driverSchema = new StructType().add("driverID", StringType).add("name", StringType).add("license",StringType).add("gender",StringType).add("salary",IntegerType)

val driverData = Seq(
  Row("200", "Alice", "A224455", "Female", 3000),
  Row("202", "Bryan","B992244","Male",4000),
  Row("204", "Catherine","C887733","Female",4000),
  Row("208", "Daryl","D229988","Male",3000),
  Row("212", "Jenny","J663300","Female", 5000)
)

// Create a Dataframe using the above sample data
val driverWriteDF = spark.createDataFrame(spark.sparkContext.parallelize(driverData),driverSchema)

// Write Driver to Delta
driverWriteDF.write.mode("overwrite").format("delta").save(driverDeltaPath)

// Now let us read back from the delta location into a Dataframe
val driverDF: DataFrame = spark.read.format("delta").load(driverDeltaPath)

// Verify the data is available and correct
driverDF.show()

spark.sql("CREATE TABLE IF NOT EXISTS Driver USING DELTA LOCATION '" + driverDeltaPath + "'")
spark.sql("SELECT * FROM Driver").show()


In [None]:
%scala
// Here is how you can compact the data using OPTMIZE command in Spark SQL
spark.sql("OPTIMIZE delta.`" + commonPath + "/delta`")