In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from pyspark.sql.types import DoubleType, DateType

spark = SparkSession.builder.getOrCreate()

# Step 1: Load Bronze vendor_compliance table
df_raw = spark.read.format("delta").load("/mnt/delta/bronze/vendor_compliance")

# Step 2: Basic Cleaning and Type Fixes
df_clean = (
    df_raw
    .withColumn("compliance_score", col("compliance_score").cast(DoubleType()))
    .withColumn("last_audit_date", col("last_audit_date").cast(DateType()))
    .dropna(subset=["vendor_id", "compliance_score", "status"])
)

# Optional: Add validation flag if needed
# df_clean = df_clean.withColumn("valid", when(...))

# Step 3: Write to Silver
df_clean.write.format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .save("/mnt/delta/silver/vendor_compliance_clean")

# Register for SQL use (optional)
spark.sql("DROP TABLE IF EXISTS vendor_compliance_clean")
spark.sql("""
CREATE TABLE vendor_compliance_clean
USING DELTA
LOCATION '/mnt/delta/silver/vendor_compliance_clean'
""")
