In [0]:
dbutils.widgets.text(name="env", defaultValue="", label="Enter environment")
env = dbutils.widgets.get("env")
env

'dev'

In [0]:
%run "./paths"

('abfss://landing@dlsunitycat.dfs.core.windows.net/',
 'abfss://medallion@dlsunitycat.dfs.core.windows.net/bronze',
 'abfss://medallion@dlsunitycat.dfs.core.windows.net/silver',
 'abfss://medallion@dlsunitycat.dfs.core.windows.net/gold')

In [0]:
from itertools import cycle, starmap

def create_schema(catalog, schema):
    print(f"Using {catalog}")
    spark.sql(f'use catalog {catalog}')
    print(f"Creating schema {schema} if not exists in {catalog}")
    path = spark.sql(f'DESCRIBE EXTERNAL LOCATION {schema}').select('url').collect()[0].url
    print(f'Using path {path}')
    spark.sql(f"create schema if not exists {schema} managed location '{path}'")
    print('Schema Created.')
    print()

In [0]:
catalog = f'{env}_catalog'
schemas = ['bronze', 'silver', 'gold']
catalog_tup = (catalog,)
_ = list(starmap(create_schema, zip(cycle(catalog_tup), schemas)))

Using dev_catalog
Creating schema bronze if not exists in dev_catalog
Using path abfss://medallion@dlsunitycat.dfs.core.windows.net/bronze
Schema Created.

Using dev_catalog
Creating schema silver if not exists in dev_catalog
Using path abfss://medallion@dlsunitycat.dfs.core.windows.net/silver
Schema Created.

Using dev_catalog
Creating schema gold if not exists in dev_catalog
Using path abfss://medallion@dlsunitycat.dfs.core.windows.net/gold
Schema Created.



In [0]:
def create_table(catalog, table, path, create_tbl_query):
    table = f'{catalog}{table}'
    print(f"Creating table {table} if not exists")
    create_tbl_query(table, path)
    print('Table created.')
    print()

In [0]:
create_bronze_rates_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        business_year INT,
        state_code STRING,
        plan_id STRING,
        rating_area_id STRING,
        age STRING,
        rate DOUBLE,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_bronze_plans_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        business_year INT,
        state_code STRING,
        dental_only_plan STRING,
        plan_id STRING,
        metal_level STRING,
        new_plan STRING,
        plan_type STRING,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_bronze_state_gdp_table = lambda table, path: spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        GeoFIPS STRING,
        GeoName STRING,
        Region STRING,
        TableName STRING,
        LineCode INT,
        IndustryClassification STRING,
        Description STRING,
        Unit STRING,
        business_year INT,
        quarter STRING,
        value DOUBLE,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_silver_rates_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        business_year INT,
        state_code STRING,
        plan_id STRING,
        rating_area_id STRING,
        age INT,
        rate DOUBLE,
        age_category STRING,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_silver_plans_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        business_year INT,
        state_code STRING,
        plan_id STRING,
        metal_level STRING,
        new_plan STRING,
        plan_type STRING,
        pandemic_era STRING,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

In [0]:
create_gold_rates_fact_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        plan_id STRING,
        state_code STRING,
        current_year INT,
        age_category_and_year ARRAY<STRUCT<
            age_0_19 DECIMAL(10,2),
            age_20_29 DECIMAL(10,2),
            age_30_39 DECIMAL(10,2),
            age_40_49 DECIMAL(10,2),
            age_50_59 DECIMAL(10,2),
            age_60_plus DECIMAL(10,2),
            business_year INT
        >>,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_gold_plans_dim_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        plan_id STRING,
	    metal_level STRING,
	    new_plan STRING,
        plan_type STRING,
        pandemic_era STRING,
	    start_year INT,
	    end_year INT,
	    current_year INT,
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)

create_gold_state_gdp_table = lambda table, path : spark.sql(
    f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table} (
        state_code STRING,
        business_year INT,
        average_gdp DECIMAL(10,2),
        date_ingested TIMESTAMP
    )
    USING DELTA
    LOCATION '{path}'
"""
)



In [0]:
bronze_sql = {'rates' : create_bronze_rates_table,
              'plans':create_bronze_plans_table,
              'state_gdp':create_bronze_state_gdp_table}

silver_sql = {'rates' : create_silver_rates_table,
              'plans': create_silver_plans_table}
gold_sql = {'rates_fact' : create_gold_rates_fact_table,
            'plans_dim_scd': create_gold_plans_dim_table,
            'state_gdp': create_gold_state_gdp_table}

In [0]:
bronze_table_names = ['rates', 'plans', 'state_gdp']
silver_table_names = ['rates', 'plans']
gold_table_names = ['rates_fact', 'plans_dim_scd', 'state_gdp']

In [0]:
for tbl in bronze_table_names:
    create_table(catalog, bronze_tables[tbl], bronze_paths[tbl], bronze_sql[tbl])
for tbl in silver_table_names:
    create_table(catalog, silver_tables[tbl], silver_paths[tbl], silver_sql[tbl])
for tbl in gold_table_names:
    create_table(catalog, gold_tables[tbl], gold_paths[tbl], gold_sql[tbl])

Creating table dev_catalog.bronze.rates_bronze if not exists
Table created.

Creating table dev_catalog.bronze.plans_bronze if not exists
Table created.

Creating table dev_catalog.bronze.state_gdp_bronze if not exists
Table created.

Creating table dev_catalog.silver.rates_silver if not exists
Table created.

Creating table dev_catalog.silver.plans_silver if not exists
Table created.

Creating table dev_catalog.gold.rates_fact if not exists
Table created.

Creating table dev_catalog.gold.plans_dim_scd if not exists
Table created.

Creating table dev_catalog.gold.state_gdp_dim if not exists
Table created.

