In [0]:
import dlt
import pyspark.sql.functions as f

In [0]:
@dlt.table(name="Employee_Bronze")
def bronze():
    return spark.read.csv(
        "/Volumes/testing/default/employee/emp.csv", 
        header=True, 
        inferSchema=True
        )

In [0]:
@dlt.table(name="Employee_Silver")
def silver():
    cols_to_check = ['Gender', 'Senior_Management', 'Team']
    null_count = sum([f.when(f.col(c).isNull(), 1).otherwise(0) for c in cols_to_check])
    return (
        dlt.read("Employee_Bronze") 
        .dropna(subset=["First_Name"]) 
        .filter(null_count < 2) 
        .na.fill({'Team': 'Unknown', 'Gender':'Unknown', 'Senior_Management': 'Unknown'})
    )
    # df=df.dropna(subset=["First_Name"])
    # df=df.filter(null_count < 2)
    # df.na.fill({'Team': 'Unknown', 'Gender':'Unknown'})

In [0]:
@dlt.table(name="Employee_Gold")
def gold():
    return (
        dlt.read("Employee_Silver").
        groupBy("Team").
        agg(f.round(f.avg("Salary")).alias("Avg_Salary"),
            f.max("Salary").alias("Max_Salary"),
            f.min("Salary").alias("Min_Salary"),
            f.count("*").alias("Total_Employees"),
            f.sum("Salary").alias("Total_Salary")
        ).orderBy("Total_Salary",ascending=False)
    )