Use the following Azure Databricks storage setup block only if you are using Azure Databricks. You can refer to the instructions here to get started:
https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/adls-gen2/azure-datalake-gen2-sp-access

If you are using Synapse Spark and if your data is residing on the storage attached to the Synapse Spark workspace, you can skip the below storage setup section.

In [None]:
%scala
val storageAccountName = "<INSERT STORAGE ACCOUNT>"
val fileSystemName = "<INSERT CONTAINER NAME>"

val commonPath = "abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net"

# AAD Application Details
val appID = "<INSERT APP ID>"
val secret = "<INSERT SECRET>"
val tenantID = "<INSERT TENANT ID>"

spark.conf.set("fs.azure.account.auth.type." + storageAccountName + ".dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type." + storageAccountName + ".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id." + storageAccountName + ".dfs.core.windows.net", "" + appID + "")
spark.conf.set("fs.azure.account.oauth2.client.secret." + storageAccountName + ".dfs.core.windows.net", "" + secret + "")
spark.conf.set("fs.azure.account.oauth2.client.endpoint." + storageAccountName + ".dfs.core.windows.net", "https://login.microsoftonline.com/" + tenantID + "/oauth2/token")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "true")
dbutils.fs.ls("abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net/")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "false")

In [None]:
%scala

// Let us see how to write to a JSON file
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}

// Generate sample data
val driverDetails = Seq(
    Row("Alice","","Hood","100","New York", "Female", 4100),
    Row("Bryan","M","Williams","101","New York","Male", 4000),
    Row("Catherine","Goodwin","","102","California","Female", 4300),
    Row("Daryl","","Jones","103","Florida","Male", 5500),
    Row("Jenny","Anne","Simons","104","Arizona","Female", 3400),
    Row("Daryl","","Jones","103","Florida","Male", 5500)
  )

val driverSchema = new StructType().add("firstname", StringType).add("middlename", StringType).add("lastname",StringType).add("id",StringType).add("location",StringType).add("gender",StringType).add("salary",IntegerType)

// Create the Dataframe using the sample data
val df2 = spark.createDataFrame(spark.sparkContext.parallelize(driverDetails), driverSchema)
df2.printSchema()
df2.show(false)

// Write to storage as JSON file
df2.write.mode(SaveMode.Overwrite).json(commonPath + "/json/")


In [None]:
%scala
// Let us see how to read the JSON file back into a new Dataframe

import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}

val driverSchema = new StructType().add("firstname", StringType).add("middlename", StringType).add("lastname",StringType).add("id",StringType).add("location",StringType).add("gender",StringType).add("salary",IntegerType)

val dfJSON = spark.read.schema(driverSchema).json(commonPath + "/json/*.json")

// View the file
dfJSON.printSchema()
dfJSON.show(false)

In [None]:
%scala
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType}

// You can also infer the schema directly without specifying it for simple structures

val dfJSON = spark.read.json(commonPath + "/json/*.json")
dfJSON.printSchema()
dfJSON.show(false)

// Here is the deduced schema
val schema = dfJSON.schema
println(schema)