In [0]:
from datetime import datetime

# Widgets and secrets
container_name = dbutils.widgets.get("container_name")
storage_account_key = dbutils.secrets.get(scope = "zillowsecrets",key="storage_account_key")
archive = dbutils.widgets.get("archive") 
catalog = dbutils.widgets.get("catalog")
storage_account_name = dbutils.widgets.get("storage_account_name")

# Configure Spark
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", storage_account_key)

# === Define your per-table config here ===
table_configs = [
    {
        "table": "taxhistory_silver",
        "schema": "taxhistory",
        "source_folder": "raw_data/TaxHistory"
    },
    {
        "table": "pricehistory_silver",
        "schema": "pricehistory",
        "source_folder": "raw_data/PriceHistory"
    },
    {
        "table": "property_silver",
        "schema": "propertyextended",
        "source_folder": "raw_data/propertyExtendedSearch"
    }
]

# === Main processing loop ===
for config in table_configs:
    table = config["table"]
    schema = config["schema"]
    source_folder = config["source_folder"]

    print(f"\nProcessing table: {table} from schema: {schema}")

    source_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{source_folder}"
    archive_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{archive}/{table}"

    # List files in source folder
    try:
        items = dbutils.fs.ls(source_path)
        loaded_files = [item.name for item in items]
        loaded_files_set = set(loaded_files)
    except Exception as e:
        print(f"Could not access source folder: {source_path}. Error: {str(e)}")
        continue

    # Fetch file names from the current table
    try:
        df = spark.sql(f"SELECT DISTINCT(file_name) FROM {catalog}.{schema}.{table}")
        table_files_set = set([row.file_name for row in df.collect()])
    except Exception as e:
        print(f"Could not query table: {catalog}.{schema}.{table}. Error: {str(e)}")
        continue

    # Determine files to move
    files_to_move = loaded_files_set.intersection(table_files_set)

    for file_name in files_to_move:
        src_file = f"{source_path}/{file_name}"
        dst_file = f"{archive_path}/{file_name}"
        try:
            dbutils.fs.mv(src_file, dst_file)
            print(f"Moved {file_name} from {source_path} to {archive_path}")
        except Exception as e:
            print(f"Failed to move {file_name}: {str(e)}")

In [0]:
dbutils.jobs.taskValues.set(key="error", value=str(e))  # 'e' is the exception