In [0]:
storage_account_name = "capstone2v4c"


storage_account_key = dbutils.secrets.get(scope="capstone_team2_scope", key="adls-access-key")

In [0]:
from pyspark import pipelines as dp
from pyspark.sql.functions import current_timestamp

catalog = "maven_uc"
schema = "bronze_dlt"
base_path = "abfss://raw-data@capstone2v4c.dfs.core.windows.net" 

datasets = [
    "Transactions", "Returns", "Calenders", "Stores", "Regions"
]

def bronze_table_name(file_name):
    return f"{catalog}.{schema}.{file_name.lower()}"

def bronze_source_path(file_name):
    return f"{base_path}/{file_name}/"

def bronze_checkpoint_path(file_name):
    return f"{base_path}/_checkpoints/bronze/{file_name}"

def bronze_table_decorator(file_name):
    @dp.table(
        name=bronze_table_name(file_name),
        comment=f"Bronze table for {file_name}"
    )
    def bronze_table():
        df_raw = (
            spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "csv")
            .option("cloudFiles.schemaLocation", bronze_checkpoint_path(file_name))
            .option("header", "true")
            .option("inferSchema", "true")
            .load(bronze_source_path(file_name))
        )
        return df_raw.select(
            "*",
            current_timestamp().alias("ingestion_timestamp"),
            "_metadata.file_path"
        )
    return bronze_table

# Register a DLT table for each dataset
for file in datasets:
    bronze_table_decorator(file)