# ✈️ AIRCRAFT DATA INGESTION PIPELINE
### 1️⃣ Objective of Aircraft Extraction

Purpose:
Extract unique aircraft information from live flight movements and create an aircraft dimension table that can be linked to flight records for analytics such as aircraft utilization and delay analysis.

Aircraft data is derived from flight-level information rather than fetched from a separate endpoint, ensuring Tier-2 compliance and consistency with operational data.



In [18]:
import requests
import pandas as pd
import time
import os

API_HOST = "aerodatabox.p.rapidapi.com"
API_KEY = "784d49518dmsh0343e5da9ef1c6ap18e96bjsn12a30dc8b692"   # replace with your key

HEADERS = {
    "x-rapidapi-key": API_KEY,
    "x-rapidapi-host": API_HOST
}


In [19]:
flights_df = pd.read_csv('../data/flights.csv')

# Load flights snapshot
flights_df = pd.read_csv("../data/flights.csv")

# Clean + dedupe registrations
registrations = (
    flights_df["aircraft_registration"]
    .dropna()
    .astype(str)
    .str.strip()
    .unique()
    .tolist()
)

total_regs = len(registrations)
print(f"Unique aircraft registrations: {total_regs}")


Unique aircraft registrations: 1531


In [20]:

def fetch_aircraft_limited(reg):
    url = f"https://{API_HOST}/aircrafts/reg/{reg}/all"
    r = requests.get(url, headers=HEADERS)

    if r.status_code != 200:
        return None

    data = r.json()
    if not isinstance(data, list) or len(data) == 0:
        return None

    rec = data[0]
    return {
        "registration": rec.get("reg"),
        "model": rec.get("model"),
        "manufacturer": None,                 # not provided by API
        "icao_type_code": rec.get("icaoCode"),
        "owner": rec.get("airlineName")
    }


In [21]:
OUTPUT_PATH = "../data/aircraft.csv"
PARTIAL_PATH = "../data/aircraft_partial.csv"

# Resume if partial exists
if os.path.exists(PARTIAL_PATH):
    aircraft_df = pd.read_csv(PARTIAL_PATH)
    done_regs = set(aircraft_df["registration"].dropna().tolist())
    print(f"Resuming. Already fetched: {len(done_regs)}")
else:
    aircraft_df = pd.DataFrame(columns=[
        "registration", "model", "manufacturer", "icao_type_code", "owner"
    ])
    done_regs = set()

rows = aircraft_df.to_dict("records")


In [22]:
for i, reg in enumerate(registrations, start=1):
    if reg in done_regs:
        continue

    print(f"{i}/{total_regs} Fetching {reg}")

    row = fetch_aircraft_limited(reg)
    if row:
        rows.append(row)

    # checkpoint every 100
    if i % 100 == 0:
        pd.DataFrame(rows).to_csv(PARTIAL_PATH, index=False)
        print("Checkpoint saved.")

    time.sleep(0.7)  # rate-limit safety


1/1531 Fetching VT-TNM
2/1531 Fetching VT-EXL
3/1531 Fetching A5-JKW
4/1531 Fetching VT-TSH
5/1531 Fetching VT-BKK
6/1531 Fetching VT-PPT
7/1531 Fetching VT-EXQ
8/1531 Fetching VT-TQR
9/1531 Fetching VT-EDE
10/1531 Fetching VT-EXM
11/1531 Fetching VT-IWS
12/1531 Fetching 9M-MVC
13/1531 Fetching VT-TQB
14/1531 Fetching VT-CIN
15/1531 Fetching VT-ANC
16/1531 Fetching VT-EXV
17/1531 Fetching VT-PPQ
18/1531 Fetching VT-BWL
19/1531 Fetching VT-JRA
20/1531 Fetching VT-SLC
21/1531 Fetching VT-IXK
22/1531 Fetching VT-ISQ
23/1531 Fetching A4O-BAA
24/1531 Fetching VT-NCO
25/1531 Fetching VT-IFK
26/1531 Fetching VT-JRH
27/1531 Fetching VT-NAC
28/1531 Fetching VT-TNB
29/1531 Fetching VT-YAQ
30/1531 Fetching S2-AHV
31/1531 Fetching VT-TNC
32/1531 Fetching VT-SGV
33/1531 Fetching VT-SQD
34/1531 Fetching VT-PPX
35/1531 Fetching A6-EGU
36/1531 Fetching VT-NCC
37/1531 Fetching VT-NAA
38/1531 Fetching VT-TVH
39/1531 Fetching VT-NCH
40/1531 Fetching VT-TNK
41/1531 Fetching VT-CIH
42/1531 Fetching VT-NCK


In [25]:
aircraft_df = pd.DataFrame(rows)

# Add surrogate key
aircraft_df.insert(0, "aircraft_id", range(1, len(aircraft_df) + 1))

# Save final snapshot
aircraft_df.to_csv(OUTPUT_PATH, index=False)

# Optional: remove partial
if os.path.exists(PARTIAL_PATH):
    os.remove(PARTIAL_PATH)

aircraft_df



Unnamed: 0,aircraft_id,registration,model,manufacturer,icao_type_code,owner
0,1,VT-TNM,A20N,,A20N,Air India
1,2,VT-EXL,A20N,,A320,Air India
2,3,A5-JKW,,,A319,Drukair
3,4,VT-TSH,B789,,B789,Air India
4,5,VT-BKK,A320,,A320,AIX Connect
...,...,...,...,...,...,...
272,273,VT-RTI,A20N,,A20N,Air India Express
273,274,VT-IJQ,A20N,,A20N,IndiGo
274,275,S2-AKJ,AT75,,AT75,US-Bangla Airlines
275,276,OK-TVS,B738,,B738,Eurowings
