# Silver Cleansing

## Imports

In [0]:
from pyspark.sql import functions as F

In [0]:
df_silver = spark.table("mtg.bronze.t2_cards_ingestion")

## Null values

In [0]:
df_silver_not_null = (
    df_silver
    .withColumn("power", F.coalesce("power", F.lit("not defined")))
    .withColumn("toughness", F.coalesce("toughness", F.lit("not defined")))
    .withColumn("loyalty", F.coalesce("loyalty", F.lit("not defined")))
)


## Colorless treatment

In [0]:
df_silver_colorless = (
    df_silver_not_null
    .withColumn(
        "colors",
        F.when((F.col("colors").isNull()) | (F.col("colors") == ""), F.lit("colorless"))
         .otherwise(F.col("colors"))
    )
    .withColumn(
        "color_identity",
        F.when((F.col("color_identity").isNull()) | (F.col("color_identity") == ""), F.lit("colorless"))
         .otherwise(F.col("color_identity"))
    )
)



## Mana cost treatment

In [0]:
df_silver_clean = df_silver_colorless.withColumn(
    "mana_cost",
    F.regexp_replace(F.col("mana_cost"), r"[{}]", "")
)


## Types treatment for better visualization in BI

In [0]:
from pyspark.sql import functions as F

df_silver_clean = (
    df_silver_clean
    .withColumn(
        "type_cleaned",
        F.trim(
            F.regexp_replace(
                F.split(
                    F.regexp_replace(F.col("type_line"), r"[–—-]", "—"),
                    "—"
                ).getItem(0),
                r"\s+", " "  
            )
        )
    )
    .withColumn(
        "type_cleaned",
        F.regexp_replace(
            F.col("type_cleaned"),
            r"(?i)^(legendary|basic|snow|world|ongoing)\s+", "" 
        )
    )
    .withColumn(
        "type_cleaned",
        F.when(
            (F.col("type_cleaned").isNull()) | (F.trim(F.col("type_cleaned")) == ""),
            F.lit("Unknown")
        ).otherwise(F.col("type_cleaned"))
    )
)

missing = df_silver_clean.filter(
    (F.col("type_cleaned").isNull()) | (F.trim(F.col("type_cleaned")) == "")
).count()

print(f"Linhas sem type_cleaned depois do tratamento: {missing}")
df_silver_clean.display()


## Saving df in Unity Catalog

In [0]:
df_silver_clean.write.format("delta").mode("overwrite").saveAsTable("mtg.silver.t2_cards_clean")

print(f"Table successfully written")
