In [0]:
%sql
USE CATALOG catalog_cp;

CREATE TABLE IF NOT EXISTS silver.monitoring (
    table_name STRING,
    metric STRING,
    value BIGINT,
    check_date TIMESTAMP,
    status STRING
) USING DELTA;


In [0]:
from pyspark.sql.functions import col
from pyspark.sql import Row
from datetime import datetime

# List of silver tables
tables = [
    "region", "nation", "part", "supplier", "partsupp", "customer", "orders", "lineitem"
]

monitoring_rows = []

check_date = datetime.now()

for table in tables:
    df = spark.table(f"catalog_cp.silver.{table}")
    row_count = df.count()
    
    null_counts = {c: df.filter(col(c).isNull()).count() for c in df.columns}
    columns_with_nulls = [k for k, v in null_counts.items() if v > 0]
    
    status = "OK" if len(columns_with_nulls) == 0 else "WARN"

    monitoring_rows.append(Row(
        table_name=table,
        metric="row_count",
        value=row_count,
        check_date=check_date,
        status=status
    ))
    
    for col_name in columns_with_nulls:
        monitoring_rows.append(Row(
            table_name=table,
            metric=f"null_count_{col_name}",
            value=null_counts[col_name],
            check_date=check_date,
            status="WARN"
        ))

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, LongType, TimestampType
schema = StructType([
    StructField("table_name", StringType(), False),
    StructField("metric", StringType(), False),
    StructField("value", LongType(), True),
    StructField("check_date", TimestampType(), False),
    StructField("status", StringType(), False)
])

monitoring_df = spark.createDataFrame(monitoring_rows, schema=schema)
monitoring_df.write.format("delta").mode("append").saveAsTable("catalog_cp.silver.monitoring")