In [0]:
import dlt
from pyspark.sql.functions import (
    col, to_date, current_timestamp,
    lpad, substring, when, year, month, date_format, quarter, lit
)
from pyspark.sql.types import IntegerType, DoubleType, LongType

# ---------- HARD RULES (drop if fail) ----------
crime_rules_drop = {
    "valid_dr_no": "DR_NO IS NOT NULL",
    "valid_date_occ": "DATE_OCC IS NOT NULL",
    "valid_crime_code": "CRM_CD IS NOT NULL",
    "valid_status": "STATUS IS NOT NULL"
}

# ---------- SOFT RULES (keep row, just flag) ----------
crime_rules_warn = {
    # cleaned age is either null or 0â€“100
    "reasonable_age": "VICT_AGE_CLEAN IS NULL OR (VICT_AGE_CLEAN >= 0 AND VICT_AGE_CLEAN <= 100)"
}

@dlt.table(
    comment="Silver table: cleaned & standardized Los Angeles crime data",
    table_properties={"quality": "silver"}
)
@dlt.expect_all_or_drop(crime_rules_drop)
@dlt.expect_all(crime_rules_warn)
def crime_silver():

    df = dlt.read_stream("crime_bronze")

    # --- dates to proper DATE type (if coming as string) ---
    df = (
        df
        .withColumn("DATE_OCC", col("DATE_OCC").cast("date"))
        .withColumn("DATE_RPTD", col("DATE_RPTD").cast("date"))
    )

    # --- TIME_OCC is HHMM; 
    df = (
        df
        .withColumn("TIME_OCC_STR", lpad(col("TIME_OCC").cast("string"), 4, "0"))
    )

    # --- clean victim age ---
    # negative values or >100 = unknown -> NULL
    df = df.withColumn(
        "VICT_AGE_INT",
        col("VICT_AGE").cast(IntegerType())
    ).withColumn(
        "VICT_AGE_CLEAN",
        when((col("VICT_AGE_INT") >= 0) & (col("VICT_AGE_INT") <= 100),
             col("VICT_AGE_INT"))
        .otherwise(None)
    )

    # --- cast lat/lon ---
    df = (
        df
        .withColumn("LAT", col("LAT").cast(DoubleType()))
        .withColumn("LON", col("LON").cast(DoubleType()))
        .withColumn(
            "is_location_valid",
            (col("LAT").isNotNull()) &
            (col("LON").isNotNull()) &
            ~((col("LAT") == 0) & (col("LON") == 0))
        )
        # set invalid coordinates to NULL so they don't show up at (0,0) on maps
        .withColumn(
            "LAT",
            when(col("is_location_valid"), col("LAT")).otherwise(lit(None).cast(DoubleType()))
        )
        .withColumn(
            "LON",
            when(col("is_location_valid"), col("LON")).otherwise(lit(None).cast(DoubleType()))
        )
    )

    # --- adding silver load date ---
    df = (
        df.withColumn("silver_load_dt", current_timestamp())
    )



    # --- final column set ---
    df = df.select(
        col("DR_NO").cast(LongType()).alias("DR_NO"),
        "DATE_OCC",
        "DATE_RPTD",
        "TIME_OCC",
        
        "AREA",
        "AREA_NAME",
        "CRM_CD",
        "CRM_CD_DESC",
        "CRM_CD_1",
        "CRM_CD_2",
        "CRM_CD_3",
        "CRM_CD_4",
        "VICT_AGE_CLEAN",
        "VICT_SEX",
        "VICT_DESCENT",
        "PREMIS_CD",
        "PREMIS_DESC",
        "WEAPON_USED_CD",
        "WEAPON_DESC",
        "STATUS",
        "STATUS_DESC",
        "LOCATION",
        "CROSS_STREET",
        "LAT",
        "LON",
        "is_location_valid",
        "load_dt",
        "source_file_path",
        "source_file_name",
        "silver_load_dt"
    )

    return df
