# <img src ='https://airsblobstorage.blob.core.windows.net/airstream/databricks.png' width="50px"> Mounting ADLS Gen2

[First Create Your Key Vault Secret Scope](https://docs.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes)

[Accessing Azure Data Lake Storage Gen2 and Blob Storage with Azure Databricks](https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/azure-storage)

In [0]:
#FIRST CREATE YOUR KEY VAULT SECRET SCOPE 
#Tutorial: https://docs.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes

#You need to go here to create your secret Scope
#https://YOURDATABRICKSINSTANCE.azuredatabricks.net#secrets/createScope

In [0]:
#THESE ARE UNIQUE TO ME YOU WILL HAVE TO CHANGE THEM FOR YOU 
#https://docs.microsoft.com/en-us/azure/databricks/data/data-sources/azure/azure-storage

storage_account_name = 'YOURDATALAKE'
client_id            = dbutils.secrets.get(scope="akv-bck-scope", key="databricks-app-client-id")
tenant_id            = dbutils.secrets.get(scope="akv-bck-scope", key="databricks-app-tenant-id")
client_secret        = dbutils.secrets.get(scope="akv-bck-scope", key="databricks-app-client-secret")

configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.id": f"{client_id}",
           "fs.azure.account.oauth2.client.secret": f"{client_secret}",
           "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"}


#YOURDATALAKE                - east - LRS
#YOURDATALAKE-GEO-REDUNTANT  - east - RA-GRS
#YOURDATALAKE-WEST           - west - LRS

In [0]:
#Unmount a Mount
dbutils.fs.unmount("/mnt/YOURDATALAKE")

/mnt/airdatalake has been unmounted.
Out[14]: True

In [0]:
#Now create your mount using a Service Principal
container_name = "managed-flight"
dbutils.fs.mount(
  source = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/",
  mount_point = f"/mnt/{storage_account_name}",
  extra_configs = configs)

Out[16]: True

In [0]:
%fs ls /mnt/

path,name,size,modificationTime
dbfs:/mnt/airdatalake/,airdatalake/,0,0
dbfs:/mnt/airdatalakegrs/,airdatalakegrs/,0,0
dbfs:/mnt/airdatalakewest/,airdatalakewest/,0,0
dbfs:/mnt/datalake/,datalake/,0,0
dbfs:/mnt/training/,training/,0,0


In [0]:
#Get list of files and folders inside a mount
display(
  dbutils.fs.ls("/mnt/YOURDATALAKE")
)

path,name,size,modificationTime
dbfs:/mnt/airdatalake/flightdeltalake/,flightdeltalake/,0,1656092300000


In [0]:
%fs
ls dbfs:/databricks-datasets/airlines

path,name,size,modificationTime
dbfs:/databricks-datasets/airlines/README.md,README.md,1089,1454697889000
dbfs:/databricks-datasets/airlines/_SUCCESS,_SUCCESS,0,1436493184000
dbfs:/databricks-datasets/airlines/part-00000,part-00000,67108879,1436493184000
dbfs:/databricks-datasets/airlines/part-00001,part-00001,67108862,1436493185000
dbfs:/databricks-datasets/airlines/part-00002,part-00002,67108930,1436493185000
dbfs:/databricks-datasets/airlines/part-00003,part-00003,67108804,1436493186000
dbfs:/databricks-datasets/airlines/part-00004,part-00004,67108908,1436493186000
dbfs:/databricks-datasets/airlines/part-00005,part-00005,67108890,1436493187000
dbfs:/databricks-datasets/airlines/part-00006,part-00006,67108825,1436493187000
dbfs:/databricks-datasets/airlines/part-00007,part-00007,67108880,1436493187000


In [0]:
#load one file to review
df = spark.read.load("/databricks-datasets/airlines/part-00000",format="csv",sep=",",inferSchema="true",header="true" )

#Infer the schema
csv_schema = df.schema

#Read all the files into a dataframe
df = spark \
    .read \
    .format("csv")\
    .schema(csv_schema) \
    .load("/databricks-datasets/airlines/part-*")

In [0]:
df = df.filter(df.Year.isNotNull()).drop()

In [0]:
display(df)

Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed
2001,8,2,4,,1047,,1222,AA,1056,�NKNO�,,95,,,,MCI,ORD,403,0,0,1,,0,,,,,,YES,YES
2001,8,3,5,1048.0,1047,1210.0,1222,AA,1056,N274A1,82.0,95,66.0,-12.0,1.0,MCI,ORD,403,6,10,0,,0,,,,,,NO,YES
2001,8,4,6,1043.0,1047,1159.0,1222,AA,1056,N513A1,76.0,95,61.0,-23.0,-4.0,MCI,ORD,403,4,11,0,,0,,,,,,NO,NO
2001,8,5,7,1043.0,1047,1203.0,1222,AA,1056,N532A1,80.0,95,65.0,-19.0,-4.0,MCI,ORD,403,6,9,0,,0,,,,,,NO,NO
2001,8,6,1,1045.0,1047,1159.0,1222,AA,1056,N521A1,74.0,95,62.0,-23.0,-2.0,MCI,ORD,403,4,8,0,,0,,,,,,NO,NO
2001,8,7,2,1047.0,1047,1208.0,1222,AA,1056,N417A1,81.0,95,65.0,-14.0,0.0,MCI,ORD,403,6,10,0,,0,,,,,,NO,NO
2001,8,8,3,1047.0,1047,1203.0,1222,AA,1056,N440A1,76.0,95,60.0,-19.0,0.0,MCI,ORD,403,7,9,0,,0,,,,,,NO,NO
2001,8,9,4,1054.0,1047,1224.0,1222,AA,1056,N483A1,90.0,95,66.0,2.0,7.0,MCI,ORD,403,7,17,0,,0,,,,,,YES,YES
2001,8,10,5,1052.0,1047,1205.0,1222,AA,1056,N431A1,73.0,95,53.0,-17.0,5.0,MCI,ORD,403,5,15,0,,0,,,,,,NO,YES
2001,8,11,6,1045.0,1047,1205.0,1222,AA,1056,N424A1,80.0,95,54.0,-17.0,-2.0,MCI,ORD,403,16,10,0,,0,,,,,,NO,NO


Output can only be rendered in Databricks

In [0]:
# This will take a few minutes to run since this is the first execution step it will be executing the DAG 
df.count()

Out[7]: 1235349690

In [0]:
%sql
DROP DATABASE IF EXISTS flight_delta

In [0]:
%sql
DROP TABLE flight_delta.FlightDeltaLake

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS flight_delta
LOCATION "/mnt/YOURDATALAKE"  --this is the location of the deltalake, since we are providing a location it is unmanaged