# Notebook 02 – Silver Layer (Transformação)

Este notebook transforma os dados crus do Bronze em dados tratados no Silver,
corrigindo tipos, removendo nulos e padronizando colunas.


In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS silver;

In [0]:
from pyspark.sql.functions import col, when, trim, lit

df_silver = (
    spark.table("bronze.churn_raw")
        .withColumnRenamed("customerID", "customer_id")
        .withColumn("SeniorCitizen", col("SeniorCitizen").cast("int"))
        .withColumn("tenure", col("tenure").cast("int"))
        .withColumn("MonthlyCharges", col("MonthlyCharges").cast("double"))
        .withColumn(
            "TotalCharges",
            when(trim(col("TotalCharges")) == "", lit(None))  # se vazio -> None
            .otherwise(col("TotalCharges").cast("double"))
        )
)

df_silver.write.mode("overwrite").format("delta").saveAsTable("silver.churn_clean")
display(spark.table("silver.churn_clean").limit(5))

customer_id,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes
