In [0]:
dbutils.widgets.dropdown("environment", "dev", ["dev", "test", "prod"], "Environment")
environment = dbutils.widgets.get("environment")
CATALOG = f"w_{environment}"
SCHEMA = "landing_admin"
TABLE_NAME = f"{CATALOG}.{SCHEMA}.meta_control_table"


# Query the control table for active datasets
print(f"Querying control table: {TABLE_NAME} for active datasets in '{environment}'...")
control_table_df = spark.sql(f"""
    SELECT 
        source_dataset_id,
        source_dataset_name,
        last_timestamp,
        target_folder,
        load_increment_days,
        page_size
    FROM {TABLE_NAME} 
    WHERE is_active = true AND environment = '{environment}'
""")

# Convert the DataFrame to a list of dictionaries

params_list = [row.asDict() for row in control_table_df.collect()]

# Handle timestamps: Convert datetime objects to ISO 8601 string format to ensure JSON compatibility
for params in params_list:
    if 'last_timestamp' in params and params['last_timestamp'] is not None:
        params['last_timestamp'] = params['last_timestamp'].isoformat()

print(f"Found {len(params_list)} datasets to process.")

# Set the list as a task value
if params_list:
    dbutils.jobs.taskValues.set(key="datasets_to_process", value=params_list)
    print("Successfully set 'datasets_to_process' task value.")
else:
    print("No active datasets found. Skipping task value set.")

In [0]:
print(params_list)