In [0]:
# Load KPI-5 base dataset
from pyspark.sql.functions import col

df_base = spark.table("kpi5_airline_base")

df_base.show(5)


+----------+---------+---------+--------+-------+-----+-----------+--------------------+----------------+----------------+---------------+--------------+
|op_carrier|arr_delay|dep_delay|taxi_out|taxi_in|month|day_of_week|        airline_name|arr_delay_capped|dep_delay_capped|taxi_out_capped|taxi_in_capped|
+----------+---------+---------+--------+-------+-----+-----------+--------------------+----------------+----------------+---------------+--------------+
|        AA|      3.0|      4.0|    29.0|   15.0|    1|          3|   American Airlines|             3.0|             4.0|           29.0|          15.0|
|        AS|     -7.0|    -10.0|    12.0|   11.0|    1|          3|     Alaska Airlines|             0.0|             0.0|           12.0|          11.0|
|        DL|     14.0|      0.0|    56.0|    5.0|    1|          3|     Delta Air Lines|            14.0|             0.0|           56.0|           5.0|
|        EV|    -16.0|     -6.0|    11.0|    6.0|    1|          3| ExpressJ

In [0]:
# Calculate total ground time (taxi-out + taxi-in)
df_ground = df_base.withColumn(
    "total_ground_time",
    col("taxi_out_capped") + col("taxi_in_capped")
)

df_ground.select(
    "airline_name",
    "taxi_out_capped",
    "taxi_in_capped",
    "total_ground_time"
).show(5)


+--------------------+---------------+--------------+-----------------+
|        airline_name|taxi_out_capped|taxi_in_capped|total_ground_time|
+--------------------+---------------+--------------+-----------------+
|   American Airlines|           29.0|          15.0|             44.0|
|     Alaska Airlines|           12.0|          11.0|             23.0|
|     Delta Air Lines|           56.0|           5.0|             61.0|
| ExpressJet Airlines|           11.0|           6.0|             17.0|
|Envoy Air (Americ...|           18.0|           8.0|             26.0|
+--------------------+---------------+--------------+-----------------+
only showing top 5 rows


In [0]:
# Calculate total ground time (taxi-out + taxi-in)
df_ground = df_base.withColumn(
    "total_ground_time",
    col("taxi_out_capped") + col("taxi_in_capped")
)

df_ground.select(
    "airline_name",
    "taxi_out_capped",
    "taxi_in_capped",
    "total_ground_time"
).show(5)


+--------------------+---------------+--------------+-----------------+
|        airline_name|taxi_out_capped|taxi_in_capped|total_ground_time|
+--------------------+---------------+--------------+-----------------+
|   American Airlines|           29.0|          15.0|             44.0|
|     Alaska Airlines|           12.0|          11.0|             23.0|
|     Delta Air Lines|           56.0|           5.0|             61.0|
| ExpressJet Airlines|           11.0|           6.0|             17.0|
|Envoy Air (Americ...|           18.0|           8.0|             26.0|
+--------------------+---------------+--------------+-----------------+
only showing top 5 rows


In [0]:
# Aggregate ground handling efficiency metrics per airline
from pyspark.sql.functions import avg, stddev, count

df_kpi5c = df_ground.groupBy(
    "op_carrier",
    "airline_name"
).agg(
    count("*").alias("total_flights"),
    avg("taxi_out_capped").alias("avg_taxi_out"),
    avg("taxi_in_capped").alias("avg_taxi_in"),
    avg("total_ground_time").alias("avg_total_ground_time"),
    stddev("total_ground_time").alias("stddev_ground_time")
)

df_kpi5c.show(5)


+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+
|op_carrier|        airline_name|total_flights|      avg_taxi_out|      avg_taxi_in|avg_total_ground_time|stddev_ground_time|
+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+
|        AA|   American Airlines|       534592| 16.75564355620735|8.147289895845804|   24.902933452053155| 9.448595800184183|
|        AS|     Alaska Airlines|       132217|17.005566606412184| 6.94810803451901|   23.953674640931197| 9.969439316471368|
|        DL|     Delta Air Lines|       551516|15.026156992725506|6.790963453462819|   21.817120446188323| 8.932526436070075|
|        EV| ExpressJet Airlines|        47981|19.396448594235217|7.431108146974844|   26.827556741210064|11.237519782605988|
|        MQ|Envoy Air (Americ...|       197193|  17.9132423564731|8.413229678538285|   26.326472035011385|10.917164560

In [0]:
# Create ground efficiency score (lower ground time = higher score)
from pyspark.sql.functions import max as spark_max

max_ground_time = df_kpi5c.agg(
    spark_max("avg_total_ground_time")
).collect()[0][0]

df_kpi5c = df_kpi5c.withColumn(
    "ground_efficiency_score",
    1 - (col("avg_total_ground_time") / max_ground_time)
)

df_kpi5c.show(5)


+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+-----------------------+
|op_carrier|        airline_name|total_flights|      avg_taxi_out|      avg_taxi_in|avg_total_ground_time|stddev_ground_time|ground_efficiency_score|
+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+-----------------------+
|        AA|   American Airlines|       534592| 16.75564355620735|8.147289895845804|   24.902933452053155| 9.448595800184183|     0.0810763663398032|
|        AS|     Alaska Airlines|       132217|17.005566606412184| 6.94810803451901|   23.953674640931197| 9.969439316471368|    0.11610422190066005|
|        DL|     Delta Air Lines|       551516|15.026156992725506|6.790963453462819|   21.817120446188323| 8.932526436070075|     0.1949435340614203|
|        EV| ExpressJet Airlines|        47981|19.396448594235217|7.431108146974844|   26.8275567412

In [0]:
# Save KPI-5C airline ground handling efficiency table
df_kpi5c.write \
    .mode("overwrite") \
    .saveAsTable("kpi5c_airline_ground_efficiency")

# Verify
spark.table("kpi5c_airline_ground_efficiency").show(5)


+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+-----------------------+
|op_carrier|        airline_name|total_flights|      avg_taxi_out|      avg_taxi_in|avg_total_ground_time|stddev_ground_time|ground_efficiency_score|
+----------+--------------------+-------------+------------------+-----------------+---------------------+------------------+-----------------------+
|        B6|     JetBlue Airways|       135869|15.404890004342418|6.678602182985081|     22.0834921873275|  8.82700643006812|     0.1851143591674953|
|        AS|     Alaska Airlines|       132217|17.005566606412184| 6.94810803451901|   23.953674640931197| 9.969439316471368|    0.11610422190066005|
|        MQ|Envoy Air (Americ...|       197193|  17.9132423564731|8.413229678538285|   26.326472035011385|10.917164560661119|   0.028547484558624325|
|        9E|Endeavor Air (Del...|       204215|15.387934284944789|6.290243126117082|    21.678177411