In [0]:
%pip install databricks-cli
%pip install databricks-sdk

In [0]:
dbutils.library.restartPython()

In [0]:
df_eff_dates = spark.sql("""
    select DISTINCT date_format(to_date(date_add(SYS_EFFECTIVE_DATE, 1)), 'yyyy-MM-dd') AS  int_effective_date 
    --gap_catalog.ads_etl_owner.DLK_ADS_LOV_RDS_ANALYTICALEVENTSTATUS 
    from gap_catalog.ads_etl_owner.DLK_ADS_LOV_RDS_DISPOTRANSACTIONTYPE
    ORDER BY int_effective_date;
""")

increments = {row.int_effective_date: f"B{row.int_effective_date.replace('-', '')}" 
              for row in df_eff_dates.collect()}

In [0]:
from databricks_cli.jobs.api import JobsApi
from databricks_cli.runs.api import RunsApi
from databricks_cli.sdk.api_client import ApiClient
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.jobs import JobSettings, RunResultState
import time

# Set to False for dry-run (only print), True to actually execute jobs
#EXECUTE_JOBS = False
EXECUTE_JOBS = True

# Cluster configuration mapping
CLUSTER_CONFIGS = {
    "2XS": {"warehouse_id": "63bb9c3e8676522d", "cluster_num": "1"},
    "XS": {"warehouse_id": "94a4cb87c4f0d125", "cluster_num": "2"},
    "S": {"warehouse_id": "296ce849ee510c76", "cluster_num": "3"}
}

# Select cluster size
CLUSTER_SIZE = "S"  # Change to "2XS", "XS" or "S" as needed
cluster_config = CLUSTER_CONFIGS[CLUSTER_SIZE]

token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
host = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()

workspace_client = WorkspaceClient()
job_id = "1092027122452197" #ADS_RDS_PROC

new_warehouse_id = cluster_config["warehouse_id"]
cluster_num = cluster_config["cluster_num"]

existing_job = workspace_client.jobs.get(job_id=job_id)
new_tasks = []
for task in existing_job.settings.tasks:
    if task.sql_task:
        task.sql_task.warehouse_id = new_warehouse_id
        new_tasks.append(task)

print(f"{'='*60}")
print(f"Mode: {'EXECUTION' if EXECUTE_JOBS else 'DRY-RUN (printing only)'}")
print(f"Job ID: {job_id}")
print(f"Cluster Size: {CLUSTER_SIZE}")
print(f"Warehouse ID: {new_warehouse_id}")
print(f"Cluster Number: {cluster_num}")
print(f"Total dates to process: {len(increments)}")
print(f"{'='*60}\n")

for i, (date, partition) in enumerate(sorted(increments.items()), 1):
    # Format date from YYYY-MM-DD to YYYYMMDD
    date_formatted = date.replace('-', '')
    
    # Create p_process_key and test_case with cluster number
    p_process_key = f"-{date_formatted}{cluster_num}00"
    test_case = f"ADS_RDS_{date_formatted}{cluster_num}00"
    
    print(f"[{i}/{len(increments)}] Date: {date} | Partition: {partition}")
    print(f"  Process Key: {p_process_key}")
    print(f"  Test Case: {test_case}")
    
    if not EXECUTE_JOBS:
        print(f"  -> Would update tags: effective_date={date}, etl=ADS_RDS, cluster_size={CLUSTER_SIZE}")
        print(f"  -> Would trigger ADS_RDS_PROC with parameters: p_load_date={date}, p_process_key={p_process_key}")
        print(f"  -> Would then trigger ADS_RDS_Counts (449940996801892) with same parameters")
        print()
        continue
    
    # Actually execute when EXECUTE_JOBS is True
    try:
        # Update tags
        workspace_client.jobs.update(
            job_id=job_id,
            new_settings=JobSettings(
                tags={
                    "effective_date": date,
                    "etl": "ADS_RDS",
                    "cluster_size": CLUSTER_SIZE,
                    "optimal": "true",
                    "test_case": test_case,
                    "use_case": "ora_dbx_assessment"
                }
            )
        )
        print(f"  -> Tags updated")
        
        # Trigger the ADS_RDS_PROC job
        run = workspace_client.jobs.run_now_and_wait(
                    job_id=job_id,
                    job_parameters={
                        "p_load_date": date,
                        "p_process_key": p_process_key
                    }
                )
        print(f"  -> ADS_RDS_PROC Run finished: {run.state.life_cycle_state} | {run.state.result_state} | Duration: {run.execution_duration}ms")
        
        # If the first job succeeded, run ADS_RDS_Counts
        if run.state.result_state == RunResultState.SUCCESS:
            counts_job_id = "449940996801892"  # ADS_RDS_Counts
            print(f"  -> Triggering ADS_RDS_Counts job...")
            
            counts_run = workspace_client.jobs.run_now_and_wait(
                        job_id=counts_job_id,
                        job_parameters={
                            "p_load_date": date,
                            "p_process_key": p_process_key
                        }
                    )
            print(f"  -> ADS_RDS_Counts Run finished: {counts_run.state.life_cycle_state} | {counts_run.state.result_state} | Duration: {counts_run.execution_duration}ms")
        else:
            print(f"  -> Skipping ADS_RDS_Counts because ADS_RDS_PROC result was: {run.state.result_state}")
            
    except Exception as e:
        print(f"  -> ERROR: {e}")
    
    print()

print(f"{'='*60}")
print(f"{'DRY-RUN COMPLETE' if not EXECUTE_JOBS else 'EXECUTION COMPLETE'}")
print(f"{'='*60}")

In [0]:
%sql
SELECT operationMetrics['numOutputRows'] AS affected_rows
FROM (DESCRIBE HISTORY gap_catalog.ads_owner.event_status) WHERE 1=1 /*operation='MERGE'*/ORDER BY version DESC LIMIT 1

In [0]:
%sql
DESCRIBE HISTORY gap_catalog.ads_owner.event_status