In [0]:
storage_account_name = "flightdatastorage"
storage_account_key = ""
container_name = "raw"

spark.conf.set(
    f"fs.azure.account.key.{storage_account_name}.blob.core.windows.net",
    storage_account_key
)

In [0]:
BRONZE_PATH = (
    "wasbs://bronze@flightdatastorage.blob.core.windows.net/"
    "raw_data/data_set/"
)


#### 1. Load Bronze Delta Table

**Purpose**  
Load the Bronze layer as a Delta table to serve as the single source of truth for post-ingestion validation.

This operation is strictly **read-only** and ensures all downstream validation is performed against persisted Bronze data rather than in-memory DataFrames.


In [0]:
df_bronze = spark.read.format("delta").load(BRONZE_PATH)


#### 2. Validate Partition Completeness

**Purpose**  
Verify that all expected `source_year` and `source_quarter` partitions have been successfully ingested.

This confirms:
- End-to-end ingestion completeness
- Correct partitioning strategy
- Read-time partition pruning behavior


In [0]:
(
    df_bronze
    .groupBy("source_year", "source_quarter")
    .count()
    .orderBy("source_year", "source_quarter")
    .display()
)


source_year,source_quarter,count
2024,2,8533978
2024,3,8297869
2024,4,24351696
2025,1,7297028
2025,2,8450420


#### 3. Record Count Baseline

**Purpose**  
Establish a total row count baseline for the Bronze layer.

This metric is used for:
- High-level ingestion validation
- Future reconciliation with Silver and Gold layers
- Operational monitoring and audit documentation


In [0]:
total_rows = df_bronze.count()
print(f"Total Bronze rows: {total_rows}")


Total Bronze rows: 56930991


#### 4. Schema Inspection and Freeze

**Purpose**  
Inspect and document the Bronze schema after ingestion is complete.

The Bronze schema represents the **contract** for downstream Silver transformations and should not be modified without explicit design changes.


In [0]:
df_bronze.printSchema()


root
 |-- ITIN_ID: string (nullable = true)
 |-- MKT_ID: string (nullable = true)
 |-- MARKET_COUPONS: string (nullable = true)
 |-- YEAR: string (nullable = true)
 |-- QUARTER: string (nullable = true)
 |-- ORIGIN_AIRPORT_ID: string (nullable = true)
 |-- ORIGIN_AIRPORT_SEQ_ID: string (nullable = true)
 |-- ORIGIN_CITY_MARKET_ID: string (nullable = true)
 |-- ORIGIN: string (nullable = true)
 |-- ORIGIN_COUNTRY: string (nullable = true)
 |-- ORIGIN_STATE_FIPS: string (nullable = true)
 |-- ORIGIN_STATE_ABR: string (nullable = true)
 |-- ORIGIN_STATE_NM: string (nullable = true)
 |-- ORIGIN_WAC: string (nullable = true)
 |-- DEST_AIRPORT_ID: string (nullable = true)
 |-- DEST_AIRPORT_SEQ_ID: string (nullable = true)
 |-- DEST_CITY_MARKET_ID: string (nullable = true)
 |-- DEST: string (nullable = true)
 |-- DEST_COUNTRY: string (nullable = true)
 |-- DEST_STATE_FIPS: string (nullable = true)
 |-- DEST_STATE_ABR: string (nullable = true)
 |-- DEST_STATE_NM: string (nullable = true)
 |-- 

#### 5. Critical Column Null Checks

**Purpose**  
Perform non-destructive data quality checks on critical ingestion metadata columns.

This step validates:
- Parameter propagation (`source_year`, `source_quarter`)
- Ingestion consistency
- Absence of structural ingestion defects

No remediation is performed at this stage.


In [0]:
from pyspark.sql.functions import col

df_bronze.filter(col("source_year").isNull()).count()
df_bronze.filter(col("source_quarter").isNull()).count()


0

#### 6. Data Spot Inspection

**Purpose**  
Visually inspect a small sample of Bronze records to validate:
- Data readability
- Column alignment
- Absence of obvious corruption or parsing issues

This step provides a final human sanity check before promoting data to the Silver layer.


In [0]:
display(df_bronze.limit(50))


ITIN_ID,MKT_ID,MARKET_COUPONS,YEAR,QUARTER,ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,ORIGIN,ORIGIN_COUNTRY,ORIGIN_STATE_FIPS,ORIGIN_STATE_ABR,ORIGIN_STATE_NM,ORIGIN_WAC,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID,DEST,DEST_COUNTRY,DEST_STATE_FIPS,DEST_STATE_ABR,DEST_STATE_NM,DEST_WAC,AIRPORT_GROUP,WAC_GROUP,TK_CARRIER_CHANGE,TK_CARRIER_GROUP,OP_CARRIER_CHANGE,OP_CARRIER_GROUP,REPORTING_CARRIER,TICKET_CARRIER,OPERATING_CARRIER,BULK_FARE,PASSENGERS,MARKET_FARE,MARKET_DISTANCE,DISTANCE_GROUP,MARKET_MILES_FLOWN,NONSTOP_MILES,ITIN_GEO_TYPE,MKT_GEO_TYPE,ingestion_ts,source_year,source_quarter
20242516,2024251601,1,2024,2,11624,1162403,31624,EYW,US,12,FL,Florida,33,11697,1169706,32467,FLL,US,12,FL,Florida,33,EYW:FLL,33:33,0.0,3M,0.0,3M,3M,3M,3M,0.0,1.0,51.0,145.0,1,145.0,145.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
202424759470,20242475947002,1,2024,2,11292,1129202,30325,DEN,US,8,CO,Colorado,82,11648,1164802,31648,FCA,US,30,MT,Montana,84,DEN:FCA,82:84,0.0,UA,0.0,UA,UA,UA,UA,0.0,1.0,209.0,752.0,2,752.0,752.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
20242517,2024251701,1,2024,2,11624,1162403,31624,EYW,US,12,FL,Florida,33,11697,1169706,32467,FLL,US,12,FL,Florida,33,EYW:FLL,33:33,0.0,3M,0.0,3M,3M,3M,3M,0.0,3.0,51.0,145.0,1,145.0,145.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
202424759471,20242475947101,1,2024,2,11648,1164802,31648,FCA,US,30,MT,Montana,84,11292,1129202,30325,DEN,US,8,CO,Colorado,82,FCA:DEN,84:82,0.0,UA,0.0,UA,UA,UA,UA,0.0,1.0,212.5,752.0,2,752.0,752.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
20242518,2024251801,1,2024,2,11624,1162403,31624,EYW,US,12,FL,Florida,33,11697,1169706,32467,FLL,US,12,FL,Florida,33,EYW:FLL,33:33,0.0,3M,0.0,3M,3M,3M,3M,0.0,3.0,67.0,145.0,1,145.0,145.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
202424759471,20242475947102,1,2024,2,11292,1129202,30325,DEN,US,8,CO,Colorado,82,11648,1164802,31648,FCA,US,30,MT,Montana,84,DEN:FCA,82:84,0.0,UA,0.0,UA,UA,UA,UA,0.0,1.0,212.5,752.0,2,752.0,752.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
20242519,2024251901,1,2024,2,11624,1162403,31624,EYW,US,12,FL,Florida,33,11697,1169706,32467,FLL,US,12,FL,Florida,33,EYW:FLL,33:33,0.0,UA,0.0,3M,3M,UA,3M,0.0,2.0,67.0,145.0,1,145.0,145.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
202424759472,20242475947201,1,2024,2,11648,1164802,31648,FCA,US,30,MT,Montana,84,11292,1129202,30325,DEN,US,8,CO,Colorado,82,FCA:DEN,84:82,0.0,UA,0.0,UA,UA,UA,UA,0.0,1.0,214.0,752.0,2,752.0,752.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
20242520,2024252001,1,2024,2,11624,1162403,31624,EYW,US,12,FL,Florida,33,11697,1169706,32467,FLL,US,12,FL,Florida,33,EYW:FLL,33:33,0.0,UA,0.0,3M,3M,UA,3M,0.0,3.0,72.0,145.0,1,145.0,145.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
202424759472,20242475947202,1,2024,2,11292,1129202,30325,DEN,US,8,CO,Colorado,82,11648,1164802,31648,FCA,US,30,MT,Montana,84,DEN:FCA,82:84,0.0,UA,0.0,OO,UA,UA,OO,0.0,1.0,214.0,752.0,2,752.0,752.0,2,2,2026-01-15T22:00:39.473959Z,2024,2
