# AIRPORTS DATA INGESTION PIPELINE


###1️⃣ Import Required Libraries

Purpose:
Load all Python libraries needed for API calls, data handling, and rate-limit safety.

In [34]:
import requests
import pandas as pd
import time

### 2️⃣ Define API Configuration & Airport List

Purpose:
Centralize API credentials and define the list of airports used throughout the project.

In [35]:
API_HOST = "aerodatabox.p.rapidapi.com"
HEADERS = {
    "x-rapidapi-key": "784d49518dmsh0343e5da9ef1c6ap18e96bjsn12a30dc8b692",
    "x-rapidapi-host": API_HOST
}
AIRPORTS = [
    "DEL","BOM","BLR","HYD","MAA","CCU","COK",
    "JFK","LHR","DXB","SIN","CDG","HND","SYD"
]

### 3️⃣ Fetch Basic Airport Identity (IATA Endpoint)

Endpoint Used:
/airports/iata/{iata}

Purpose:
Retrieve authoritative airport identifiers, coordinates, and timezone.

In [36]:
import pandas as pd
import requests

rows = []

for code in AIRPORTS:
    url = f"https://aerodatabox.p.rapidapi.com/airports/iata/{code}"
    r = requests.get(url, headers=HEADERS)

    if r.status_code != 200:
        print(f"❌ {code} failed | Status: {r.status_code}")
        print(r.text)
        continue

    d = r.json()

    location = d.get("location", {})
    country = location.get("country", {})

    rows.append({
        "icao_code": d.get("icao"),
        "iata_code": d.get("iata"),
        "name": d.get("shortname"),
        "city": location.get("city"),
        "country": country.get("name"),
        "continent": country.get("continent"),
        "latitude": location.get("lat"),
        "longitude": location.get("lon"),
        "timezone": d.get("timeZone")
    })

### 4️⃣ Create Initial Airport DataFrame

Purpose:
Convert raw API responses into a structured Pandas DataFrame.

In [37]:
airport_df = pd.DataFrame(rows)
airport_df

Unnamed: 0,icao_code,iata_code,name,city,country,continent,latitude,longitude,timezone
0,VIDP,DEL,,,,,28.5665,77.1031,Asia/Kolkata
1,VABB,BOM,,,,,19.0887,72.8679,Asia/Kolkata
2,VOBL,BLR,,,,,13.197899,77.7063,Asia/Kolkata
3,VOHS,HYD,,,,,17.231318,78.429855,Asia/Kolkata
4,VOMM,MAA,,,,,12.990005,80.1693,Asia/Kolkata
5,VECC,CCU,,,,,22.6547,88.4467,Asia/Kolkata
6,VOCI,COK,,,,,10.152,76.4019,Asia/Kolkata
7,KJFK,JFK,,,,,40.6398,-73.7789,America/New_York
8,EGLL,LHR,,,,,51.4706,-0.461941,Europe/London
9,OMDB,DXB,,,,,25.252798,55.3644,Asia/Dubai


### 5️⃣ Define Country-to-Continent Mapping

Purpose:
Map ISO country codes to continents for analytical classification.

In [38]:
COUNTRY_TO_CONTINENT = {
    "IN": "Asia",
    "US": "North America",
    "GB": "Europe",
    "AE": "Asia",
    "SG": "Asia",
    "FR": "Europe",
    "JP": "Asia",
    "AU": "Australia"
}


### 6️⃣ Enrich Airport Metadata Using Search Endpoint

Endpoint Used:
/airports/search/location

Purpose:
Enrich airport records with:

- Airport name

- City (municipality)

- Country code

- Continent (derived)

In [39]:
import requests
import time

def enrich_airport_by_location(lat, lon):
    url = "https://aerodatabox.p.rapidapi.com/airports/search/location"

    params = {
        "lat": lat,
        "lon": lon,
        "radiusKm": 10,
        "limit": 1
    }

    r = requests.get(url, headers=HEADERS, params=params)

    if r.status_code != 200:
        return None, None, None,None

    data = r.json()
    items = data.get("items", [])

    if not items:
        return None, None, None,None

    item = items[0]
    name = item.get("name") or item.get("shortName")
    city = item.get("municipalityName")
    country_code = item.get("countryCode")
    continent = COUNTRY_TO_CONTINENT.get(country_code)

    return name,city, country_code, continent


### 7️⃣ Apply Enrichment to All Airports

Purpose:
Iteratively enrich each airport using its latitude and longitude.

In [40]:
names = []
cities = []
countries = []
continents = []

for _, row in airport_df.iterrows():
    name, city, country_code, continent = enrich_airport_by_location(
        row["latitude"],
        row["longitude"]
    )
    names.append(name)
    cities.append(city)
    countries.append(country_code)
    continents.append(continent)

    time.sleep(1)  # IMPORTANT: avoid rate limiting


### 8️⃣ Merge Enriched Fields into Airport DataFrame

Purpose:
Create a final, analytics-ready airport master table.

In [41]:
airport_df["name"] = names
airport_df["city"] = cities
airport_df["country"] = countries
airport_df["continent"] = continents

airport_df


Unnamed: 0,icao_code,iata_code,name,city,country,continent,latitude,longitude,timezone
0,VIDP,DEL,New Delhi Indira Gandhi,New Delhi,IN,Asia,28.5665,77.1031,Asia/Kolkata
1,VABB,BOM,Mumbai Chhatrapati Shivaji,Mumbai,IN,Asia,19.0887,72.8679,Asia/Kolkata
2,VOBL,BLR,Bangalore Bengaluru,Bangalore,IN,Asia,13.197899,77.7063,Asia/Kolkata
3,VOHS,HYD,Hyderabad Rajiv Gandhi,Hyderabad,IN,Asia,17.231318,78.429855,Asia/Kolkata
4,VOMM,MAA,Chennai,Chennai,IN,Asia,12.990005,80.1693,Asia/Kolkata
5,VECC,CCU,Kolkata Netaji Subhash Chandra Bose,Kolkata,IN,Asia,22.6547,88.4467,Asia/Kolkata
6,VOCI,COK,Kochi Cochin,Kochi,IN,Asia,10.152,76.4019,Asia/Kolkata
7,KJFK,JFK,New York John F Kennedy,New York,US,North America,40.6398,-73.7789,America/New_York
8,EGLL,LHR,London Heathrow,London,GB,Europe,51.4706,-0.461941,Europe/London
9,OMDB,DXB,Dubai,Dubai,AE,Asia,25.252798,55.3644,Asia/Dubai


In [42]:
airport_df.to_csv("../data/airports.csv", index=False)