In [0]:
dbutils.widgets.text("p_environment", "")
dbutils.widgets.text("p_internal_product_id", "")
dbutils.widgets.text("p_source_server_name", "")
dbutils.widgets.text("p_source_database_name", "")
dbutils.widgets.text("p_internal_client_id", "")
dbutils.widgets.text("p_internal_facility_id", "")

In [0]:
%run ../../classhandlers/UtilityHandler

In [0]:
%run ../../classhandlers/ProcessConfigHandler

In [0]:
required_parameters = ["p_environment", "p_internal_product_id", "p_source_server_name"]
run_parameters_dict = NotebookUtils.get_parameters_dict(dbutils.notebook.entry_point.getCurrentBindings())

NotebookUtils.validate_parameters(run_parameters_dict, required_parameters)

#convert to int
run_parameters_dict["p_internal_product_id"] = int(run_parameters_dict["p_internal_product_id"])
run_parameters_dict["p_internal_client_id"] = int(run_parameters_dict["p_internal_client_id"]) if run_parameters_dict["p_internal_client_id"] else None
run_parameters_dict["p_internal_facility_id"] = int(run_parameters_dict["p_internal_facility_id"]) if run_parameters_dict["p_internal_facility_id"] else None

print(f"Run Parameters:\n{run_parameters_dict}")

In [0]:
import json
from pyspark.sql.functions import col, trim

In [0]:
process_config = ProcessConfigData(run_parameters_dict["p_environment"])

df_config = process_config.get_table_list_aggregate (
                                                        run_parameters_dict["p_internal_product_id"], 
                                                        run_parameters_dict["p_source_server_name"], 
                                                        run_parameters_dict["p_source_database_name"], 
                                                        run_parameters_dict["p_internal_client_id"], 
                                                        run_parameters_dict["p_internal_facility_id"]
                                                    )
# Filter for new Pipelines to be created
df_new = df_config.select('IngestionPipelineName').filter((col("IngestionPipelineId").isNull()) | (trim(col("IngestionPipelineId")) == ""))
# Filter for Existing Pipelines
df_existing = df_config.select('IngestionPipelineName').filter((col("IngestionPipelineId").isNotNull()) & (trim(col("IngestionPipelineId")) != ""))
# Create Lists of Pipeline Names
new_pipeline_config_list = [row.asDict()['IngestionPipelineName'] for row in df_new.collect()]
existing_pipeline_config_list = [row.asDict()['IngestionPipelineName'] for row in df_existing.collect()]

print(f"New Pipeline Config List:\n{new_pipeline_config_list}","\n\n")
print(f"Existing Pipeline Config List:\n{existing_pipeline_config_list}")

#dbutils.jobs.taskValues.set(key = "run_parameters_dict", value = run_parameters_dict)
# Register Lists and Counts for downstream Tasks....
dbutils.jobs.taskValues.set(key = "new_pipeline_config_count", value = len(new_pipeline_config_list))
dbutils.jobs.taskValues.set(key = "existing_pipeline_config_count", value = len(existing_pipeline_config_list))
dbutils.jobs.taskValues.set(key = "new_pipeline_config_list", value = new_pipeline_config_list)
dbutils.jobs.taskValues.set(key = "existing_pipeline_config_list", value = existing_pipeline_config_list)