Use the following Azure Databricks storage setup block only if you are using Azure Databricks. You can refer to the instructions here to get started:
https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/adls-gen2/azure-datalake-gen2-sp-access

If you are using Synapse Spark and if your data is residing on the storage attached to the Synapse Spark workspace, you can skip the below storage setup section.

In [None]:
%scala
val storageAccountName = "<INSERT STORAGE ACCOUNT>"
val fileSystemName = "<INSERT CONTAINER NAME>"

val commonPath = "abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net"

# AAD Application Details
val appID = "<INSERT APP ID>"
val secret = "<INSERT SECRET>"
val tenantID = "<INSERT TENANT ID>"

spark.conf.set("fs.azure.account.auth.type." + storageAccountName + ".dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type." + storageAccountName + ".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id." + storageAccountName + ".dfs.core.windows.net", "" + appID + "")
spark.conf.set("fs.azure.account.oauth2.client.secret." + storageAccountName + ".dfs.core.windows.net", "" + secret + "")
spark.conf.set("fs.azure.account.oauth2.client.endpoint." + storageAccountName + ".dfs.core.windows.net", "https://login.microsoftonline.com/" + tenantID + "/oauth2/token")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "true")
dbutils.fs.ls("abfss://" + fileSystemName  + "@" + storageAccountName + ".dfs.core.windows.net/")
spark.conf.set("fs.azure.createRemoteFileSystemDuringInitialization", "false")

In [None]:
%python
import pyspark

columnNames = ["name","license","gender","salary"]
driverData = [
  ('Alice', 'A224455', 'Female', 3000),
  ('Bryan','B992244','Male',4000),
  ('Catherine','C887733','Female',2000),
  ('Daryl','D229988','Male',3000),
  ('Jenny','J663300','Female', 6000)
]

# Create the Dataframe
df = spark.createDataFrame(data= driverData, schema = columnNames)
print("Default Partitions: " + str(df.rdd.getNumPartitions()))

# Using repartition
repartitionDF = df.repartition(3)
print("Repartition Partitions: " + str(repartitionDF.rdd.getNumPartitions()))

# Using coalesce
coalesceDF=df.coalesce(2)
print("Coalesce Partitions: " + str(coalesceDF.rdd.getNumPartitions()))

# Using reparitionByRange
repartitionRangeDF = df.repartitionByRange(1,'salary')
print("Range Partitions: " + str(repartitionRangeDF.rdd.getNumPartitions()))

# You can also use partitionBy and write to files
df.write.partitionBy("gender","salary").mode("overwrite").parquet(commonPath + "/parquet/driver/partition/")
