# Initialization

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, DateType
from pyspark.sql.functions import trim, col, lit

# Read from Bronze Table

In [0]:
df = spark.read.table("workspace.bronze.spotify_albums_raw")
display(df)

# Silver Transformations

### Trimming

In [0]:
for field in df.schema.fields:
  if isinstance(field.dataType, StringType):
    df = df.withColumn(field.name, trim(col(field.name)))

### Remove Unnecessary Columns

In [0]:
df = df.drop("href", "external_urls", "external_ids", "uri")

### Handle Type Column

In [0]:
df = df.filter(col("type") == "album").drop("type")

### Convert Date Columns to the right type

In [0]:
df = df.withColumns({
    "release_date": 
        F.when(F.length(F.col("release_date")) == 4, F.to_timestamp(F.col("release_date"), "yyyy"))
         .when(F.length(F.col("release_date")) == 7, F.to_timestamp(F.col("release_date"), "yyyy-MM"))
         .when(F.length(F.col("release_date")) == 10, F.to_timestamp(F.col("release_date"), "yyyy-MM-dd"))
         .otherwise(F.lit(None)),
    "processed_at": F.to_timestamp(F.col("processed_at"), "yyyy-MM-dd'T'HH:mm:ss'Z'")
})

### Handle NULL Values

In [0]:
df = df.withColumns({
    "name": F.coalesce(col("name"), lit("n/a")),
    "album_type": F.coalesce(col("album_type"), lit("n/a")),
    "release_Date_precision": F.coalesce(col("release_date_precision"), lit("n/a")),
    "total_tracks": F.coalesce(col("total_tracks"), lit(0)),
    "label": F.coalesce(col("label"), lit("n/a")),
    "popularity": F.coalesce(col("popularity"), lit(0))
    # "available_markets": F.coalesce(col("available_markets"), lit(array))
})

## Check Dataframe

In [0]:
df.display()

## Save Silver Table

In [0]:
df.write.mode("append").format("delta").saveAsTable("workspace.silver.spotify_albums")