This notebook turns on the Gateway Ingestion based on the provided parameters, waits for 5 minutes, and then activates the Ingestion Pipeline. Once both pipelines are running, this notebook triggers the stop notebook, which monitors the status of the ingestion pipeline. While waiting for the ingestion pipeline to complete or return to an IDLE status, this notebook will continuously monitor its status. Once the ingestion is complete or the 20-minute timeout is reached, this notebook will automatically shut down to prevent a perpetual loop.

Initialization of Widgets

In [0]:
dbutils.widgets.text("p_product_id", "", "Product ID")
dbutils.widgets.text("p_source_server_name", "", "Source Server Name")
dbutils.widgets.text("p_gateway_id", "", "Gateway ID")
dbutils.widgets.text("p_ingestion_id", "", "Ingestion ID")
dbutils.widgets.text("p_is_historical", "", "isHistorical")
dbutils.widgets.text("p_max_wait_time", "", "Max Wait Time")

Extracts values from the widgets and checks if the required fields are populated.  
If any required field is missing, an error will be raised.

In [0]:
def get_widget_values():
    v_widget_values_dict = {
        "filter_source_server_name"   : dbutils.widgets.get("p_source_server_name").strip(),
        "filter_gateway_pipeline_id"  : dbutils.widgets.get("p_gateway_id").strip(), 
        "filter_ingestion_pipeline_id": dbutils.widgets.get("p_ingestion_id").strip(),
        "filter_productId"            : dbutils.widgets.get("p_product_id").strip(),
        "filter_is_historical"        : dbutils.widgets.get("p_is_historical").strip(),
        "filter_max_wait_time"        : dbutils.widgets.get("p_max_wait_time").strip()
    }

    v_required_widgets_list = [
        "filter_gateway_pipeline_id",
        "filter_ingestion_pipeline_id",
        "filter_max_wait_time"
    ]
    
    for widget in v_required_widgets_list:
        if not v_widget_values_dict[widget]:
            raise ValueError(f"The widget '{widget}' must be populated before continuing.")

    return v_widget_values_dict

try:
    v_widget_values        = get_widget_values()
    filter_productId             = v_widget_values["filter_productId"]
    filter_source_server_name    = v_widget_values["filter_source_server_name"]
    filter_gateway_pipeline_id   = v_widget_values["filter_gateway_pipeline_id"]
    filter_ingestion_pipeline_id = v_widget_values["filter_ingestion_pipeline_id"]
    filter_is_historical         = v_widget_values["filter_is_historical"]
    filter_max_wait_time         = v_widget_values["filter_max_wait_time"]
    print("All required widgets are populated. Proceeding with the script.")
except ValueError as e:
    print(f"Error: {str(e)}")
    raise

In [0]:
%run "../common/DataFabricCommonFunctions"

In [0]:
import json
import requests
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import catalog, jobs, pipelines
import time

workspace_client = WorkspaceClient()

notebook_context    = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
v_api_token         = notebook_context.apiToken().get()
v_databricks_url    = notebook_context.apiUrl().get()
v_pipelines_api_url = f"{v_databricks_url}/api/2.0/pipelines"
api_headers_dict = {
    'Authorization': f'Bearer {v_api_token}',
    "Content-Type": "application/json"
}

This is the main process. It verifies the current status of the pipeline and triggers the stop notebook.  
A maximum timeout is implemented to prevent a perpetual run.

In [0]:
def run_pipeline_monitoring():
    if filter_is_historical == "initial_cluster_spec":
        is_full_refresh = True
    elif filter_is_historical == "ongoing_cluster_spec":
        is_full_refresh = False
    else:
        raise ValueError(
            f"Invalid value for 'isHistorical': {filter_is_historical}. Expected 'initial_cluster_spec' or 'ongoing_cluster_spec'."
        )

    gateway_status = get_pipeline_status(v_api_token, v_pipelines_api_url, filter_gateway_pipeline_id, "start")
    ingestion_status = get_pipeline_status(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, "start")
    print(f"Initial Status - Gateway: {gateway_status}, Ingestion: {ingestion_status}")

    if gateway_status is None or gateway_status == "NOT_FOUND":
        raise Exception("Gateway Pipeline not found. Exiting...")
        return

    start_gateway_wait_time = 300
    if gateway_status == "IDLE" and ingestion_status == "IDLE":
        print(f"Starting gateway pipeline {filter_gateway_pipeline_id}...")
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_gateway_pipeline_id, 'start', is_full_refresh)
        print(f"Waiting {start_gateway_wait_time} seconds for gateway to stabilize...")
        time.sleep(start_gateway_wait_time)
        print(f"Starting ingestion pipeline {filter_ingestion_pipeline_id}...")
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, 'start', is_full_refresh)

    elif gateway_status == "RUNNING" and ingestion_status == "IDLE":
        print("Gateway already running, starting ingestion...")
        time.sleep(start_gateway_wait_time)
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, 'start', is_full_refresh)

    elif ingestion_status == "NOT_FOUND":
        raise Exception("Failed: Pipeline Not Found")
    
    else:
        raise Exception("Pipelines already running or in unexpected state. Exiting...")

    max_wait_time = int(filter_max_wait_time) - 180
    check_interval = 30
    elapsed_time = 0

    while elapsed_time < max_wait_time:
        try:
            ingestion_status = get_pipeline_status(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, "start")
            gateway_status = get_pipeline_status(v_api_token, v_pipelines_api_url, filter_gateway_pipeline_id, "start")
            print(f"Current Status - Ingestion: {ingestion_status}, Gateway: {gateway_status}")
        except Exception as e:
            print(f"Error fetching status: {e}")
            time.sleep(check_interval)
            elapsed_time += check_interval
            continue

        if ingestion_status == "IDLE":
            print("Ingestion completed. Stopping both pipelines...")
            break
        
        if gateway_status == "IDLE":
            raise Exception("Gateway unexpectedly turn off. Exiting...")
            switch_pipeline(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, 'stop', False)
            break
        
        if ingestion_status == "FAILED" or ingestion_status == "CANCELED":
            raise Exception("Ingestion Failed. Exiting...")
            break

        if ingestion_status == "NOT_FOUND" or gateway_status == "NOT_FOUND":
            raise Exception("One or both pipelines not found. Exiting...")
            break

        time.sleep(check_interval)
        elapsed_time += check_interval

    if elapsed_time >= max_wait_time:
        print("Timeout reached! Forcing shutdown of Gateway and Ingestion pipelines...")
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_gateway_pipeline_id, 'stop', False)
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, 'stop', False)
        raise Exception(
            f"Timeout reached! Forcing shutdown of Gateway and Ingestion pipelines..."
        )
    elif ingestion_status == "IDLE":
        switch_pipeline(v_api_token, v_pipelines_api_url, filter_gateway_pipeline_id, 'stop', False)
        ingestion_status = get_pipeline_status(v_api_token, v_pipelines_api_url, filter_ingestion_pipeline_id, "end")
        if ingestion_status == "FAILED" or ingestion_status =="CANCELED":
            raise Exception("Ingestion Failed. Exiting...")
if __name__ == "__main__":
    run_pipeline_monitoring()
