In [1]:
# 1. Load the Silver Table (All Laps from all races)
df_laps = spark.read.table("f1_race_laps")

# 2. Register as a View so we can use SQL
df_laps.createOrReplaceTempView("laps_view")

# 3. Create the Gold Data (The Aggregation)
# We calculate fastest lap, total laps, and average tire life per driver/race
gold_stats = spark.sql("""
    SELECT 
        RaceName,
        Season,
        Driver,
        MIN(LapTime) as Fastest_Lap,
        COUNT(*) as Laps_Completed,
        AVG(TyreLife) as Avg_Tyre_Life
    FROM laps_view
    GROUP BY RaceName, Season, Driver
    ORDER BY RaceName, Fastest_Lap ASC
""")

# 4. Save the Table (Overwrite mode guarantees fresh data)
gold_stats.write.mode("overwrite").saveAsTable("gold_driver_stats")

print("✅ Gold Table 'gold_driver_stats' created successfully!")

StatementMeta(, ab24883a-9078-4155-9ef9-3f5990e089c4, 5, Finished, Available, Finished)

✅ Gold Table 'gold_driver_stats' created successfully!


In [2]:
%%sql
SELECT * FROM gold_driver_stats LIMIT 5

StatementMeta(, ab24883a-9078-4155-9ef9-3f5990e089c4, 6, Finished, Available, Finished)

<Spark SQL result set with 5 rows and 6 fields>

In [3]:
%%sql
SELECT RaceName, Season, COUNT(*) as Total_Laps
FROM f1_race_laps
GROUP BY RaceName, Season

StatementMeta(, ab24883a-9078-4155-9ef9-3f5990e089c4, 7, Finished, Available, Finished)

<Spark SQL result set with 4 rows and 3 fields>