#### Fact: Airline Tickets â€“ Carrier Quarterly Aggregate
Build a performance-optimized aggregate fact table for carrier-level analysis derived from the base Gold fact.


###### Load Base Fact
Read the Gold transactional fact as the authoritative source for aggregation.

In [0]:
GOLD_FACT_AIRLINE_TICKETS_PATH = (
    "wasbs://gold@flightdatastorage.blob.core.windows.net/fact_airline_tickets/"
)

In [0]:
from pyspark.sql.functions import col, sum, avg, count, countDistinct, concat, lit

df_fact = (
    spark.read
         .format("delta")
         .load(GOLD_FACT_AIRLINE_TICKETS_PATH)
)

###### Build Carrier Quarterly Aggregate
Aggregate by carrier and quarter for executive-level carrier performance analysis.

In [0]:
df_fact_carrier_quarterly_agg = (
    df_fact
    .groupBy(
        "reporting_carrier_key",
        "source_year",
        "distance_group_key",
        "source_quarter"
    )
    .agg(
        sum("passenger_cnt").alias("total_passengers"),
        sum("market_fare_usd").alias("total_revenue"),
        avg("market_fare_usd").alias("avg_fare"),
        sum("market_miles_flown").alias("total_miles"),
        count("*").alias("total_flights"),
        countDistinct("origin_airport_key").alias("origins_served"),
        countDistinct("dest_airport_key").alias("destinations_served"),
        countDistinct(concat(col("origin_airport_key"), lit("-"), col("dest_airport_key"))).alias("unique_routes"),
        avg("market_distance_miles").alias("avg_flight_distance"),
        (sum("market_fare_usd") / sum("passenger_cnt")).alias("revenue_per_passenger"),
        (sum("market_fare_usd") / sum("market_miles_flown")).alias("yield_per_mile")
    )
)

###### Validate Aggregate Metrics
Ensure aggregate totals match base fact for key metrics.

In [0]:
df_fact.selectExpr("sum(passenger_cnt)").show()
df_fact_carrier_quarterly_agg.selectExpr("sum(total_passengers)").show()

+------------------+
|sum(passenger_cnt)|
+------------------+
|          75893539|
+------------------+

+---------------------+
|sum(total_passengers)|
+---------------------+
|             75893539|
+---------------------+



In [0]:
df_fact.selectExpr("sum(market_miles_flown)").show()
df_fact_carrier_quarterly_agg.selectExpr("sum(total_miles)").show()

+-----------------------+
|sum(market_miles_flown)|
+-----------------------+
|            51974345493|
+-----------------------+

+----------------+
|sum(total_miles)|
+----------------+
|     51974345493|
+----------------+



In [0]:
df_fact.selectExpr("count(*)").show()
df_fact_carrier_quarterly_agg.selectExpr("sum(total_flights)").show()

+--------+
|count(1)|
+--------+
|39876323|
+--------+

+------------------+
|sum(total_flights)|
+------------------+
|          39876323|
+------------------+



In [0]:
df_fact.selectExpr("sum(market_fare_usd)").show()
df_fact_carrier_quarterly_agg.selectExpr("sum(total_revenue)").show()

+--------------------+
|sum(market_fare_usd)|
+--------------------+
|      10801583752.42|
+--------------------+

+------------------+
|sum(total_revenue)|
+------------------+
|    10801583752.42|
+------------------+



###### Persist Carrier Aggregate Fact
Write the aggregate fact using partition-aware, incremental Delta writes.


In [0]:
GOLD_FACT_CARRIER_AGG_PATH = (
    "wasbs://gold@flightdatastorage.blob.core.windows.net/"
    "fact_carrier_quarterly_agg"
)

In [0]:
df_fact_carrier_quarterly_agg.write.format("delta").mode("overwrite").save(GOLD_FACT_CARRIER_AGG_PATH)

###### Post-write Validation
Confirm row-count reduction relative to the base fact as a sanity check.
