In [0]:
import json
from pyspark.sql import Row
from pyspark.sql import functions as F
from pyspark.sql.functions import col, when, format_string, lit, row_number
from pyspark.sql.window import Window
from pyspark.sql.types import StringType, StructType, StructField
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

In [0]:
%run "../common/DataFabricCommonFunctions"

In [0]:
def init_widgets():
    v_widget_specifications_list = [
        ("p_product_id", "", "Product ID"),
        ("p_client_id", "", "Client ID"),
        ("p_facility_id", "", "Facility ID"),
        ("p_instance_name", "", "Instance Name"),
        ("p_environment_name", "", "Environment Name")
    ]
    for v_parameter_name, v_default_value, v_label in v_widget_specifications_list:
        dbutils.widgets.text(v_parameter_name, v_default_value, v_label)

In [0]:
def get_widget_values():
    v_widget_values_dict = {
        "v_filter_product_id": dbutils.widgets.get("p_product_id").strip(),
        "v_filter_client_id": dbutils.widgets.get("p_client_id").strip(),
        "v_filter_facility_id": dbutils.widgets.get("p_facility_id").strip(),
        "v_filter_instance_name": dbutils.widgets.get("p_instance_name").strip(),
        "v_environment_name": dbutils.widgets.get("p_environment_name").strip()
    }
    if not v_widget_values_dict["v_filter_product_id"]:
        raise ValueError(
            "The widget 'Product ID' must be populated before continuing."
        )
    print("All required widgets are populated.")
    return v_widget_values_dict

In [0]:
def read_configuration_tables(v_environment_name):
    return {
        "server_view": execute_dbconfig_sql_query("SELECT * FROM cfg.vw_DataFabricServerList", v_environment_name,
        ),
        "table_view": execute_dbconfig_sql_query("SELECT * FROM cfg.vw_DataFabricTableList", v_environment_name,
        )
    }

In [0]:
def execute_product_instances(
    v_dataframes_dict,
    v_filter_product_id,
    v_instance_name_prefix,
    v_child_notebook_path
):
    v_instance_df = (
        v_dataframes_dict["server_view"]
        .filter(f"InternalProductId = {v_filter_product_id}")
        .filter(col("SourceServerName1").startswith(v_instance_name_prefix))
        .filter(col("DataSourceId") == 46)
    )

    v_table_list_df = (
        v_dataframes_dict["table_view"]
        .filter(f"InternalProductId = {v_filter_product_id}")
        .filter(col("DataSourceId") == 46)
    )
 
    v_parameters_list = []
    for record in v_instance_df.collect():
        v_server = record["SourceServerName1"]
        v_database = record["SourceDatabaseName1"]

        try:
            v_historical_flag = v_table_list_df.filter(
                (F.col("SourceServerName1") == v_server) &
                (F.col("SourceDatabaseName1") == v_database)
            ).agg(F.max(F.col("IsHistorical")).alias("historical_aggregate")).collect()[0]["historical_aggregate"]
        except Exception:
           v_historical_flag = None
 
        parameters = {
            "p_product_id": record.InternalProductId,
            "p_historical_flag": v_historical_flag,
            "p_instance_name": v_instance_name_prefix,
            "p_instance_database_name": record.SourceDatabaseName1,
            "p_environment_name": v_environment
        }
        v_parameters_list.append(parameters)

    v_results_list = []
    with ThreadPoolExecutor() as executor:
        v_futures_list = [executor.submit(dbutils.notebook.run, v_child_notebook_path, 3600, p) for p in v_parameters_list]
        for future in as_completed(v_futures_list):
            try:
                v_result = future.result()
                v_results_list.append(v_result)
            except Exception as e:
                print(f"Notebook failed: {e}")
                raise
    return v_results_list

def execute_combinations(
    v_dataframes_dict,
    widget_values,
    v_child_notebook_path
):
    v_filter_product_id = widget_values["v_filter_product_id"]
    filter_client_id = widget_values["v_filter_client_id"]
    filter_facility_id = widget_values["v_filter_facility_id"]
    v_instance_name_prefix = widget_values["v_filter_instance_name"]
 
    if isinstance(v_process_instance, list):
        for instance_name in v_process_instance:
            execute_product_instances(
                v_dataframes_dict,
                v_filter_product_id,
                instance_name,
                v_child_notebook_path
            )
        return
    elif isinstance(v_process_instance, str):
        execute_product_instances(
            v_dataframes_dict,
            v_filter_product_id,
            v_instance_name_prefix,
            v_child_notebook_path
        )
        return
 
    v_server_view_df = (
        v_dataframes_dict["server_view"]
        .filter(f"InternalProductId = {v_filter_product_id}")
        .filter(col("DataSourceId") == 47)
    )
    v_table_view_df = (
        v_dataframes_dict["table_view"]
        .filter(f"InternalProductId = {v_filter_product_id}")
        .filter(col("DataSourceId") == 47)
    )
    if filter_client_id:
        v_execution_df = v_server_view_df.filter(
            f"InternalClientId == {filter_client_id}"
        )
    else:
        raise ValueError("A valid Client ID must be provided.")
 
    if filter_facility_id:
        v_execution_df = v_execution_df.filter(
            f"InternalFacilityId == {filter_facility_id}"
        )

    v_execution_df = v_execution_df.select("InternalProductId", "InternalClientId", "InternalFacilityId", "SourceDatabaseName1")
 
    v_base_rows_df = (
        v_server_view_df.where(
            (F.col("InternalProductId") == v_filter_product_id)
            & (F.col("DataSourceId") == 47)
            & (F.col("ProcessLevel") == "Product"))
    ).select("InternalProductId", "InternalClientId", "InternalFacilityId", "SourceDatabaseName1")

    v_execution_df = v_base_rows_df.union(v_execution_df)
    v_parameters_list = []
 
    for v_combination_row in v_execution_df.collect():
        v_current_client_id = v_combination_row.InternalClientId
        v_current_facility_id = v_combination_row.InternalFacilityId
        v_current_database = v_combination_row.SourceDatabaseName1
        try:
            v_historical_flag = v_table_view_df.filter(
                (F.col("InternalProductId") == v_filter_product_id) &
                (F.col("InternalClientId") == v_current_client_id) &
                (F.col("InternalFacilityId") == v_current_facility_id) &
                (F.col("SourceDatabaseName1") == v_current_database)
            ).agg(F.max(F.col("IsHistorical")).alias("historical_aggregate")).collect()[0]["historical_aggregate"]
        except Exception:
           v_historical_flag = None
 
        parameters = {
            "p_product_id": v_filter_product_id,
            "p_client_id": v_current_client_id,
            "p_facility_id": v_current_facility_id,
            "p_historical_flag": v_historical_flag,
            "p_environment_name": v_environment,
            "p_instance_database_name": v_current_database
        }
        v_parameters_list.append(parameters)
    v_results_list = []
    with ThreadPoolExecutor() as executor:
        v_futures_list = [executor.submit(dbutils.notebook.run, v_child_notebook_path, 3600, p) for p in v_parameters_list]
        for future in as_completed(v_futures_list):
            try:
                v_result = future.result()
                v_results_list.append(v_result)
            except Exception as e:
                print(f"Notebook failed: {e}")
                raise
    return v_results_list

In [0]:
widget_values     = get_widget_values()
v_environment   = widget_values["v_environment_name"]
v_dataframes_dict            = read_configuration_tables(v_environment)
v_process_instance = widget_values["v_filter_instance_name"]
if v_process_instance == '':  v_process_instance = None

v_child_notebook_path = "./ImplementationScriptHelper"
execute_combinations(
    v_dataframes_dict,
    widget_values,
    v_child_notebook_path
)