In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

# 1️⃣ Create Spark session (Fabric usually provides this automatically)
spark = SparkSession.builder.getOrCreate()

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 3, Finished, Available, Finished)

In [2]:
# 2️⃣ Load bronze table
bronze_table = "nhl_game_shifts"  # your bronze table
df_bronze = spark.read.table(bronze_table)

df_bronze = (
    df_bronze
    .withColumn("player_id", F.col("player_id").cast("string")) 
    .withColumn("game_id", F.col("game_id").cast("string"))
)

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 4, Finished, Available, Finished)

In [3]:
# 3️⃣ Derive shift_duration_sec
df_with_duration = df_bronze.withColumn(
    "shift_duration_sec",
    (F.col("shift_end").cast("long") - F.col("shift_start").cast("long"))
)

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 5, Finished, Available, Finished)

In [4]:
# 4️⃣ Remove zero or negative shifts or **extra-long shift
df_positive_shifts = df_with_duration.filter(
    (F.col("shift_duration_sec") > 0) & (F.col("shift_duration_sec") < 180)
)

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 6, Finished, Available, Finished)

In [5]:
# 5️⃣ Remove duplicates (based on all columns)
df_no_duplicates = df_positive_shifts.dropDuplicates()



StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 7, Finished, Available, Finished)

In [6]:
# Optional: you can select only the columns needed for silver table
df_silver = df_no_duplicates.select(
    F.col("game_id"),
    F.col("player_id"), 
    F.col("shift_start"),
    F.col("shift_end"),
    F.col("shift_duration_sec")
)

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 8, Finished, Available, Finished)

In [8]:
# 6️⃣ Save as silver table
silver_table = "NHL_Lakehouse_Silver.silver_shift"
df_silver.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(silver_table)

# 7️⃣ Verify
df_silver.show(10, truncate=False)

StatementMeta(, 5735d86a-6de2-4f83-af29-904f102b1329, 10, Finished, Available, Finished)

+----------+---------+-----------+---------+------------------+
|game_id   |player_id|shift_start|shift_end|shift_duration_sec|
+----------+---------+-----------+---------+------------------+
|2012030151|8475722  |373        |436      |63                |
|2012030151|8470600  |1683       |1758     |75                |
|2012030151|8476463  |3035       |3110     |75                |
|2012030152|8473604  |1102       |1111     |9                 |
|2012030152|8475745  |2682       |2706     |24                |
|2012030152|8475745  |742        |802      |60                |
|2012030153|8469665  |1145       |1189     |44                |
|2012030153|8469459  |2255       |2301     |46                |
|2012030153|8469542  |2726       |2783     |57                |
|2012030153|8474141  |2964       |3021     |57                |
+----------+---------+-----------+---------+------------------+
only showing top 10 rows

