Setup silver external location in unity catalog UI 

In [0]:
# create Silver layer directory structure for edw 
silver_path    = 'abfss://silver@datalakeselectivaproject.dfs.core.windows.net/' # redefined later
silver_home    = 'edw'
bronze_path    = 'abfss://bronze@datalakeselectivaproject.dfs.core.windows.net/'
expected_dirs  = ['channels/', 'costs/', 'customers/', 'products/', 'promotions/', 'sales/', 'supplementary_demographics/', 'times/']

bronze_dirs    = [d.name for d in dbutils.fs.ls(bronze_path)]
ls_silver_dirs = [d.name for d in dbutils.fs.ls(silver_path)]

# check if no home dir (no data in silver layer) then create home dir
if len(ls_silver_dirs) == 0:
    print("Creating Silver Home directory in silver container")
    dbutils.fs.mkdirs(silver_path + silver_home)

silver_path = silver_path + silver_home + '/'
silver_dirs = [d.name for d in dbutils.fs.ls(silver_path)]

for d in bronze_dirs:
    if d in expected_dirs and d not in silver_dirs:
        print('Creating directory:'   + silver_path + d)
        dbutils.fs.mkdirs(silver_path + d)
    else:
        print(f'Directory {d} either Exists or not expected, skipping')
        continue

Creating Silver Home directory in silver container
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/channels/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/costs/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/customers/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/products/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/promotions/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/sales/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/supplementary_demographics/
Creating directory:abfss://silver@datalakeselectivaproject.dfs.core.windows.net/edw/times/


In [0]:
print(bronze_dirs)

['channels/', 'costs/', 'customers/', 'products/', 'promotions/', 'sales/', 'supplementary_demographics/', 'times/']


In [0]:
print(silver_dirs)

[]


In [0]:
%sql 
CREATE SCHEMA IF NOT EXISTS `selectiva-project`.edw;


DEFINING SCHEMA FOR SILVER LAYER CLOUD DIRS USING SCHEMA INFERED FROM BRONZE LAYER, not hardcoding for efficient & reusable code 

In [0]:
target_schema_name = 'edw'

In [0]:
dim_list = ['channels/', 'customers/', 'products/', 'promotions/','supplementary_demographics/', 'times/']
bronze_dirs    = [d.name for d in dbutils.fs.ls(bronze_path)]
silver_dirs = [d.name for d in dbutils.fs.ls(silver_path)]

for d in dim_list:
    if d not in bronze_dirs or d not in silver_dirs:
        print(f"Something went wrong, please check {d} directory")
        break
    else: 
        df = spark.read.format('delta').option("header", "true").option("inferSchema", "true").load(bronze_path + d)
        schema = df.schema
        e_df   = spark.createDataFrame([], schema)
        f      = len(dbutils.fs.ls(silver_path + d))
        if (e_df.count() == 0 and f == 0):
            target_table_name = d.split('/')[0] + '_' + target_schema_name
            target_dir = silver_path + d
            e_df.write.format('delta').mode('overwrite').save(silver_path + d)
            spark.sql(f"""
                  CREATE TABLE IF NOT EXISTS `selectiva-project`.{target_schema_name}.{target_table_name}
                  USING DELTA 
                  LOCATION '{target_dir}'
                    """)

        else: 
            print(f"Data found while defining schema for silver location {silver_path + d} ")



In [0]:
fact_tables = ['costs/','sales/']
for f in fact_tables:
    if f not in bronze_dirs or f not in silver_dirs:
        print(f"Something went wrong, please check {d} directory")
        break
    else: 
        df = spark.read.format('delta').option("header", "true").option("inferSchema", "true").load(bronze_path + f)
        schema = df.schema
        e_df   = spark.createDataFrame([], schema)
        l      = len(dbutils.fs.ls(silver_path + f))
        if (e_df.count() == 0 and l == 0):
            target_table_name = f.split('/')[0] + '_' + target_schema_name
            target_dir = silver_path + f
            e_df.write.format('delta').mode('overwrite').save(silver_path + f)
            spark.sql(f"""
                  CREATE TABLE IF NOT EXISTS `selectiva-project`.{target_schema_name}.{target_table_name}
                  USING DELTA 
                  LOCATION '{target_dir}'
                    """)
        else: 
            print(f"Data found while defining schema for silver location {silver_path + f} ")
