# startup

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

## Read the table

In [0]:
df = spark.table("dev_project.bronze.erp_px_cat_g1v2")

## Transformation & remove spaces

In [0]:
for field in df.schema.fields:
  if isinstance(field.dataType, StringType):
    df = df.withColumn(field.name, trim(col(field.name)))

## Normalize Maintenance Flag to Boolean

In [0]:
df = df.withColumn(
                    "MAINTENANCE",
                    # instead of Yes or No Using True and False
                    F.when(F.upper(col("MAINTENANCE")) == "YES", F.lit(True))
                    .when(F.upper(col("MAINTENANCE")) == "NO", F.lit(False))
                    .otherwise(None)
                )


In [0]:
df.display()

## Rename the col

In [0]:
RENAME_MAP = {
    "ID": "category_id",
    "CAT": "category",
    "SUBCAT": "subcategory",
    "MAINTENANCE": "maintenance_flag"
}
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

df = df.withColumn("_ingest_time", F.current_timestamp())

In [0]:
df.display()

## Write into silver schema

In [0]:
df.write.mode("overwrite").saveAsTable("dev_project.silver.erp_px_cat_g1v2")

In [0]:
%sql
SELECT * FROM dev_project.silver.erp_px_cat_g1v2 LIMIT 10;