In [0]:
from pyspark.sql.functions import *

In [0]:
# Read data from the Bronze table
df_silver = spark.read.table("football_matches_catalog.bronze.football_matches")

In [0]:
display(df_silver)

In [0]:
# Perform data transformations
df_silver = df_silver.select(
    col("Match_ID").cast("int").alias("Match_ID"),
    col("Div"),
    col("Season").cast("int").alias("Season"),
    to_date(col("Date"), "yyyy-MM-dd").alias("Date"),
    col("HomeTeam"),
    col("AwayTeam"),
    col("FTHG").cast("int").alias("HomeTeamGoals"),
    col("FTAG").cast("int").alias("AwayTeamGoals"),
    col("FTR").alias("FinalResult")
)

In [0]:
# Filter only matches from division D1 (German Bundesliga)
df_silver = df_silver.filter(col("Div") == "D1")

In [0]:
display(df_silver)

In [0]:
# Save the DataFrame as a Delta table in the Silver layer, partitioned by Season
df_silver.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .partitionBy("Season") \
    .saveAsTable("football_matches_catalog.silver.football_matches")