In [0]:
%sql
CREATE OR REPLACE TEMPORARY FUNCTION to_col_json_str(
  col_name STRING, data_type STRING, sensitive STRING, description STRING
)
RETURNS STRING
/*
RETURN CONCAT(
  '{', '''name'':''', col_name, ''',',
  '''values'':null,',
  '''dataType'':''', data_type, ''',',
  '''sensitive'':', sensitive, ',',
  '''description'':''', description, '''','}')*/

RETURN TO_JSON(named_struct(
  'name', col_name,
  'values', null,
  'dataType', data_type,
  'sensitive', sensitive,
  'description', description))

In [0]:
from pyspark.sql import functions as F

CATALOG = "4_prod"
SCHEMA = "pacs"

# Load column_tags table
col_tags = spark.table("system.information_schema.column_tags")

# Prepare ig_risk and ig_severity DataFrames
ig_risk = col_tags.filter(F.lower(F.col("tag_name")) == "ig_risk")
ig_severity = col_tags.filter(F.lower(F.col("tag_name")) == "ig_severity")

# Join and compute sensitive
ig_sensitive = (
    ig_risk.alias("r")
    .join(
        ig_severity.alias("s"),
        [
            F.col("r.catalog_name") == F.col("s.catalog_name"),
            F.col("r.schema_name") == F.col("s.schema_name"),
            F.col("r.table_name") == F.col("s.table_name"),
            F.col("r.column_name") == F.col("s.column_name"),
        ],
        "inner"
    )
    .select(
        F.col("r.catalog_name"),
        F.col("r.schema_name"),
        F.col("r.table_name"),
        F.col("r.column_name"),
        F.col("r.tag_value").alias("ig_risk"),
        F.col("s.tag_value").alias("ig_severity"),
        F.when(F.upper(F.col("r.column_name")) == "ADC_UPDT", F.lit(False))
         .when(F.col("r.tag_value").isNull() | F.col("s.tag_value").isNull(), F.lit(None))
         .when(F.col("r.tag_value").cast("int") >= 3, F.lit(True))
         .when(F.col("s.tag_value").cast("int") >= 2, F.lit(True))
         .otherwise(F.lit(False)).alias("sensitive")
    )
)

# Load columns table
columns = spark.table("system.information_schema.columns")

# Join with ig_sensitive
result = (
    columns.alias("c")
    .join(
        ig_sensitive.alias("s"),
        [
            F.col("c.table_catalog") == F.col("s.catalog_name"),
            F.col("c.table_schema") == F.col("s.schema_name"),
            F.col("c.table_name") == F.col("s.table_name"),
            F.col("c.column_name") == F.col("s.column_name"),
        ],
        "left"
    )
    .filter(
        (F.col("c.table_catalog") == CATALOG) &
        (F.col("c.table_schema") == SCHEMA)
        # Uncomment the next line to filter table_name with 'omop_%'
        # & (F.col("c.table_name").like("omop_%"))
    )
    .select(
        F.col("c.table_catalog"),
        F.col("c.table_schema"),
        F.col("c.table_name"),
        F.col("c.column_name").alias("name"),
        F.col("c.ordinal_position"),
        F.col("c.data_type").alias("dataType"),
        F.col("s.ig_risk"),
        F.col("s.ig_severity"),
        F.col("s.sensitive"),
        F.col("c.comment").alias("description"),
        F.lit(None).alias("json_str"),
        F.lit(None).alias("json_struct")
    )
)

# Insert into target table
display(result)