In [0]:
# SILVER LAYER: clean, standardize, validate
from pyspark.sql import functions as F

# Paths
bronze_path = "/Volumes/workspace/damg7370/mm_raw/bronze/material_master"
silver_path = "/Volumes/workspace/damg7370/mm_raw/silver/material_master"

# Read Bronze
bronze_df = spark.read.format("delta").load(bronze_path)

# --- Cleaning transformations ---
silver_df = (
    bronze_df
        # Trim whitespace and remove stray double quotes from string columns
        .select(
            *[
                F.trim(F.regexp_replace(c, '"', '')).alias(c)
                if t == 'string' else F.col(c)
                for c, t in bronze_df.dtypes
            ]
        )
        # Cast last_updated to DATE
        .withColumn("last_updated", F.to_date("last_updated", "yyyy-MM-dd"))
        # Add quality flag
        .withColumn("valid_row", F.col("unit_cost") >= 0)
        # Filter only valid rows
        .filter(F.col("valid_row"))
        .drop("valid_row")
        .withColumn("silver_load_ts", F.current_timestamp())
)

# --- Write to Silver Delta ---
(silver_df.write
    .format("delta")
    .mode("overwrite")
    .save(silver_path)
)

# --- Verify ---
display(spark.read.format("delta").load(silver_path).limit(10))


material_id,material_name,category,sub_category,uom,unit_cost,supplier_name,country,plant,status,last_updated,lead_time_days,safety_stock,reorder_level,remarks,bronze_ingest_ts,source_file,silver_load_ts
M00001,Metal Chair,Raw Material,Metal,L,70.63,Rodriguez,,,,,,,,,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00002,Plastic All,Raw Material,Plastic,NOS,14.24,Garza Inc,Germany,PlantD,Active,2025-08-21,16.0,653.0,167.0,Discussion own night.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00003,Plastic Perform,Raw Material,Plastic,MTR,78.58,Santos,,,,,,,,,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00004,Paint Source,Consumable,Paint,KG,268.58,Peterson-Moore,Japan,PlantA,Obsolete,2025-09-13,11.0,899.0,346.0,Behavior benefit.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00005,Coolant Show,Consumable,Coolant,KG,114.72,Munoz-Roman,India,PlantA,Active,2025-09-25,5.0,439.0,167.0,By two bad fall pick.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00006,Tape Answer,Packaging,Tape,NOS,105.54,Ray-Bush,India,PlantA,Obsolete,2025-10-15,7.0,596.0,398.0,Significant world talk.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00007,Brake Prove,Component,Brake,MTR,494.77,James Group,Brazil,PlantB,Obsolete,2025-10-31,12.0,913.0,418.0,Class great prove reduce.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00008,Plastic Shoulder,Raw Material,Plastic,KG,201.18,Wong,,,,,,,,,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00009,Wrap Stand,Packaging,Wrap,MTR,70.68,Hawkins,,,,,,,,,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z
M00010,Tape Under,Packaging,Tape,NOS,70.03,Clark PLC,Japan,PlantA,Active,2025-10-20,5.0,206.0,346.0,Defense material those poor.,2025-11-10T23:45:26.043Z,dbfs:/Volumes/workspace/damg7370/mm_raw/material_master/from_table/part-00000-tid-2760754592554938358-fa59a57a-ab9a-4edd-8b1d-17914a4c5037-127-1-c000.csv,2025-11-10T23:47:33.033Z


In [0]:
%sql
SELECT COUNT(*) AS silver_rows,
       MIN(unit_cost) AS min_cost,
       MAX(unit_cost) AS max_cost,
       COUNT(DISTINCT category) AS distinct_categories
FROM delta.`/Volumes/workspace/damg7370/mm_raw/silver/material_master`;


silver_rows,min_cost,max_cost,distinct_categories
976,1.84,499.66,4


In [0]:
display(spark.sql("SELECT current_user() AS databricks_user, current_timestamp() AS now"))


databricks_user,now
cyberteen0@gmail.com,2025-11-10T23:49:22.664Z


In [0]:
%sql
SELECT 
  'Arjun Loya' AS full_name, 
  current_user() AS databricks_user, 
  current_timestamp() AS verification_time;


full_name,databricks_user,verification_time
Arjun Loya,cyberteen0@gmail.com,2025-11-10T23:50:47.187Z
