In [0]:
# DESCRIPTION: Cleans nationality data for geospatial mapping
# ---------------------------------------------------------

from pyspark.sql.functions import col, when, count

# 1. Read Silver Data
df = spark.read.format("delta").load("abfss://silver@YOUR_ACCOUNT.dfs.core.windows.net/f1_results")

# 2. Aggregate & Map Countries
nationality_wins_df = df.filter("position = 1") \
    .withColumn("country",
        when(col("driver_nationality") == "British", "United Kingdom")
        .when(col("driver_nationality") == "German", "Germany")
        .when(col("driver_nationality") == "French", "France")
        .when(col("driver_nationality") == "Italian", "Italy")
        .when(col("driver_nationality") == "Brazilian", "Brazil")
        .when(col("driver_nationality") == "Finnish", "Finland")
        .when(col("driver_nationality") == "Dutch", "Netherlands")
        .otherwise(col("driver_nationality")) # Fallback
    ) \
    .groupBy("driver_nationality", "country") \
    .agg(count("position").alias("total_wins")) \
    .orderBy(col("total_wins").desc())

# 3. Save to Gold
nationality_wins_df.write.mode("overwrite").format("parquet").save("abfss://gold@YOUR_ACCOUNT.dfs.core.windows.net/nationality_wins")