Dynamically discovers all entity folders.
Reads streaming CSV data from each folder.
Enriches the data with metadata.
Creates a bronze-level DLT table for each entity.

In [0]:
# from pyspark.sql.functions import (
#     lit, current_timestamp, regexp_extract, col, current_user, from_utc_timestamp, udf
# )
# from pyspark.sql.types import StringType
# from cryptography.fernet import Fernet
# import dlt  
# entities = [f.name.replace("/", "") for f in dbutils.fs.ls("abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/ashish/landing/AdventureWorks_Ashish/")]


# #Encryption Setup

# fernet_encryption_key = "ZTZBmnJX2RHnOLJYonUFL9_r6iIUxWMUP8JqrBRZUwE="  # Replace with secure storage (e.g., Key Vault)
# fernet = Fernet(fernet_encryption_key.encode())

# def pii_encryption(field):
#     if field is None:
#         return None
#     try:
#         return fernet.encrypt(field.encode()).decode()
#     except Exception:
#         return field  # fallback if not string

# encryption_function = udf(pii_encryption, StringType())

# #Define PII dictionary

# pii_dict = {
#     "EmailAddress": ["EmailAddress"],
#     "Address": ["AddressLine1", "AddressLine2", "City", "PostalCode"],
#     "Customer": ["AccountNumber"],
#     "Person": ["FirstName", "MiddleName", "LastName"],
#     "SalesOrderHeader": ["AccountNumber"]
# }

# def create_bronze_table(entity_name):
#     df = (
#         spark.readStream.format("cloudFiles")
#         .option("cloudFiles.format", "csv")
#         .option("header", "true")
#         .option("inferSchema", "true")
#         .option("cloudFile.schemaEvolutionMode", "rescue") 
#         .option("recursiveFileLookup", "true")
#         .load(f"abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/ashish/landing/AdventureWorks_Ashish/{entity_name}/*")
#         .withColumn("Entity", lit(entity_name))
#         .withColumn("File_Name", regexp_extract(col("_metadata.file_path"), r'landing/AdventureWorks_Ashish/'+entity_name+r'/\d{4}/([^/]+)', 1))
#         .withColumn("Year", regexp_extract(col("_metadata.file_path"), r'landing/AdventureWorks_Ashish/'+entity_name+r'/(\d{4})', 1))
#         .withColumn("Inserted_by", current_user())
#         .withColumn('Ingested_time', from_utc_timestamp(current_timestamp(), "Asia/Kolkata"))
#     )

#     if entity_name == "Customer":
#         df = df.withColumn("AccountNumber", encryption_function(col("AccountNumber")))
#     elif entity_name == "Address":
#         df = df.withColumn("PostalCode", encryption_function(col("PostalCode")))
#     elif entity_name == "EmailAddress":
#         df = df.withColumn("EmailAddress", encryption_function(col("EmailAddress")))
#     elif entity_name == "Person":
#         df = df.withColumn("PersonType", encryption_function(col("PersonType")))

#     return df

# for entity in entities:
   
#     @dlt.table(
#         name = f"training.ashish.{entity}_bronze",
#         comment = f"Bronze table for entities"
#     )
#     def bronze_table(entity_name = entity):
#         return create_bronze_table(entity_name)

In [0]:
from pyspark.sql.functions import (
    lit, current_timestamp, regexp_extract, col, current_user, from_utc_timestamp, udf
)
from pyspark.sql.types import StringType
from cryptography.fernet import Fernet
import dlt  

#Get entity names dynamically from landing folder
entities = [f.name.replace("/", "") for f in dbutils.fs.ls(
    "abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/ashish/landing/AdventureWorks_Ashish/"
)]


# Encryption Setup

fernet_encryption_key = "ZTZBmnJX2RHnOLJYonUFL9_r6iIUxWMUP8JqrBRZUwE="  # replace with Key Vault in prod
fernet = Fernet(fernet_encryption_key.encode())

def pii_encryption(field):
    if field is None:
        return None
    try:
        return fernet.encrypt(field.encode()).decode()
    except Exception:
        return field  #fallback if not string

encryption_function = udf(pii_encryption, StringType())


#PII dictionary (which columns to encrypt)

pii_dict = {
    "EmailAddress": ["EmailAddress"],
    "Address": ["AddressLine1", "AddressLine2", "City", "PostalCode"],
    "Customer": ["AccountNumber"],
    "Person": ["FirstName", "MiddleName", "LastName", "PersonType"],
    "SalesOrderHeader": ["AccountNumber"]
}


#Bronze Table Creation Function

def create_bronze_table(entity_name):
    df = (
        spark.readStream.format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option("header", "true")
        .option("inferSchema", "true")
        .option("cloudFile.schemaEvolutionMode", "rescue") 
        .option("recursiveFileLookup", "true")
        .load(f"abfss://trainingexternal@bayadapoc.dfs.core.windows.net/learners/ashish/landing/AdventureWorks_Ashish/{entity_name}/*")
        .withColumn("Entity", lit(entity_name))
        .withColumn("File_Name", regexp_extract(col("_metadata.file_path"), 
                    r'landing/AdventureWorks_Ashish/'+entity_name+r'/\d{4}/([^/]+)', 1))
        .withColumn("Year", regexp_extract(col("_metadata.file_path"), 
                    r'landing/AdventureWorks_Ashish/'+entity_name+r'/(\d{4})', 1))
        .withColumn("Inserted_by", current_user())
        .withColumn('Ingested_time', from_utc_timestamp(current_timestamp(), "Asia/Kolkata"))
    )

    #Apply encryption if entity has PII columns
    if entity_name in pii_dict:
        for col_name in pii_dict[entity_name]:
            if col_name in df.columns:
                df = df.withColumn(col_name, encryption_function(col(col_name)))

    return df


#Generate Bronze Tables dynamically

for entity in entities:
    @dlt.table(
        name=f"{entity}_bronze",   #  underscore in name (avoid dots)
        comment=f"Bronze table for {entity} entity",
        table_properties={
            "pipelines.trigger.mode": "continuous"  #  auto trigger when files land
        }
    )
    def bronze_table(entity_name=entity):
        return create_bronze_table(entity_name)
