# Imports

In [1]:
import pandas as pd

# Import Flight Data

In [2]:
flight_data_df = pd.read_csv("flight_data_files/jan_2019_ontime.csv")

In [3]:
print(f"Number of rows in the 2019 data file is: {len(flight_data_df)}")

Number of rows in the 2019 data file is: 583985


In [4]:
flight_data_df_list = [flight_data_df]

In [5]:
flight_data_df = pd.read_csv("flight_data_files/jan_2020_ontime.csv")

In [6]:
print(f"Number of rows in the 2020 data file is: {len(flight_data_df)}")

Number of rows in the 2020 data file is: 607346


In [7]:
flight_data_df_list.append(flight_data_df)

In [8]:
flight_data_df = pd.concat(flight_data_df_list, axis=0, ignore_index=True)

In [9]:
print(f"Number of rows in the combined data file is: {len(flight_data_df)}")

Number of rows in the combined data file is: 1191331


In [10]:
flight_data_df.nunique()

DAY_OF_MONTH               31
DAY_OF_WEEK                 7
OP_UNIQUE_CARRIER          17
OP_CARRIER_AIRLINE_ID      17
OP_CARRIER                 17
TAIL_NUM                 5857
OP_CARRIER_FL_NUM        6997
ORIGIN_AIRPORT_ID         353
ORIGIN_AIRPORT_SEQ_ID     373
ORIGIN                    353
DEST_AIRPORT_ID           353
DEST_AIRPORT_SEQ_ID       373
DEST                      353
DEP_TIME                 1440
DEP_DEL15                   2
DEP_TIME_BLK               19
ARR_TIME                 1440
ARR_DEL15                   2
CANCELLED                   2
DIVERTED                    2
DISTANCE                 1512
Unnamed: 21                 0
dtype: int64

# Get FAA Airport Three Letter Codes From Flight Data

In [11]:
# Get origin airport code
airport_ORIGIN_code_list = flight_data_df["ORIGIN"].unique().tolist()

In [12]:
print(f"Number of ORIGIN airport codes in the data file is: {len(airport_ORIGIN_code_list)}")

Number of ORIGIN airport codes in the data file is: 353


In [13]:
# Get destination airport code
airport_DEST_code_list = flight_data_df["DEST"].unique().tolist()

In [14]:
print(f"Number of DEST airport codes in the data file is: {len(airport_DEST_code_list)}")

Number of DEST airport codes in the data file is: 353


In [15]:
# Combined origin and destination airport codes
# into single unique airport code list
airport_code_list = airport_ORIGIN_code_list 
    
for airport_code in airport_DEST_code_list:
    if airport_code not in airport_code_list:
        airport_code_list.append(airport_code)

In [16]:
print(f"Number of unique airport codes in the data file is: {len(airport_code_list)}")

Number of unique airport codes in the data file is: 353


# Get The Airport Name for Each Airport Code

In [17]:
import io

airport_code_city_dict = {}
airport_codes_found_cnt = 0

with io.open("flight_data_files/airport_codes_city_raw.txt", 'rt', newline='\r\n') as f:
    line = f.readline()[0:-2]
    while line != 'Canadian Provinces':
        
        if len(line.strip()) > 1 and line not in ['return to top']:
            
            line_split    = line.split('(')
            city_state    = line_split[0].strip()
            airport_code  = line_split[1][0:3].strip()
            
            if airport_code in airport_code_list:
                
                # If the current airport code is already in the airport code dict, 
                # then the current airport code is associated with multiple city names
                if airport_code in airport_code_city_dict:
                        airport_code_city_dict[airport_code].append(city_state)
                else:
                    airport_code_city_dict[airport_code] = [city_state]
                
                airport_codes_found_cnt += 1
            
        line = f.readline()[0:-2]

In [18]:
print(f"The number of airport codes found is: {airport_codes_found_cnt}")

The number of airport codes found is: 363


In [19]:
# Find FAA airport codes with multiple cities associated with it
# find airport city names with a '=' in them
for airport_code in airport_code_city_dict:
    if len(airport_code_city_dict[airport_code]) > 1 or airport_code_city_dict[airport_code][0].find("-") > -1:
        print(f"Airport code:{airport_code} city names:{airport_code_city_dict[airport_code]}")

Airport code:CAK city names:['Akron/Canton, OH', 'Canton/Akron, OH']
Airport code:ABE city names:['Allentown, PA', 'Bethlehem, PA', 'Easton, PA']
Airport code:ACV city names:['Arcata, CA', 'Eureka/Arcata, CA']
Airport code:AVL city names:['Asheville, NC', 'Hendersonville, NC']
Airport code:HTS city names:['Ashland, KY/Huntington, WV', 'Huntington, WV/Ashland, KY']
Airport code:MBS city names:['Bay City, MI', 'Midland, MI', 'Saginaw, MI']
Airport code:BPT city names:['Beaumont/Port Arthur, TX', 'Port Arthur/Beaumont, TX']
Airport code:GPT city names:['Biloxi/Gulfport, MS', 'Gulfport, MS']
Airport code:BGM city names:['Binghamton, NY', 'Endicott, NY', 'Johnson City, NY']
Airport code:TRI city names:['Bristol, VA', 'Johnson City, TN', 'Kingsport, TN']
Airport code:MRY city names:['Carmel, CA', 'Monterey, CA']
Airport code:CMI city names:['Champaign/Urbana, IL', 'Urbana/Champaign, IL']
Airport code:MDW city names:['Chicago, IL - Midway']
Airport code:ORD city names:["Chicago, IL - O'Hare"]

In [20]:
# Clean airport names
airport_code_city_dict["CAK"] = ["Akron/Canton, OH"]
airport_code_city_dict["ABE"] = ["Allentown/Bethlehem/Easton, PA"]
airport_code_city_dict["ACV"] = ["Eureka/Arcata, CA"]
airport_code_city_dict["AVL"] = ["Asheville/Hendersonville, NC"]
airport_code_city_dict["HTS"] = ["Ashland, KY", "Huntington, WV"]
airport_code_city_dict["MBS"] = ["Bay City/Midland/Saginaw, MI"]
airport_code_city_dict["BPT"] = ["Beaumont/Port Arthur, TX"]
airport_code_city_dict["GPT"] = ["Biloxi/Gulfport, MS"]
airport_code_city_dict["BGM"] = ["Binghamton/Endicott/Johnson City, NY"]
airport_code_city_dict["TRI"] = ["Bristol, VA", "Bristol/Johnson City/Kingsport, TN"]
airport_code_city_dict["MRY"] = ["Carmel/Monterey, CA"]
airport_code_city_dict["CMI"] = ["Champaign/Urbana, IL"]
airport_code_city_dict["MDW"] = ["Chicago - Midway, IL"]
airport_code_city_dict["ORD"] = ["Chicago - O'Hare, IL"]
airport_code_city_dict["HIB"] = ["Chisholm/Hibbing, MN"]
airport_code_city_dict["PIE"] = ["Clearwater/St Petersburg, FL"]
airport_code_city_dict["COD"] = ["Cody/Yellowstone, WY"]
airport_code_city_dict["DFW"] = ["Dallas/Fort Worth, TX"]
airport_code_city_dict["DEN"] = ["Denver, CO"]
airport_code_city_dict["DTW"] = ["Detroit, MI"]
airport_code_city_dict["RDU"] = ["Raleigh/Durham, NC"]
airport_code_city_dict["XNA"] = ["Fayetteville, AR"]
airport_code_city_dict["VPS"] = ["Fort Walton Beach/Valparaiso, FL"]
airport_code_city_dict["GGG"] = ["Gladewater/Kilgore/Longview, TX"]
airport_code_city_dict["GSO"] = ["Greensboro/High Point, NC"]
airport_code_city_dict["GSP"] = ["Greenville/Spartanburg, SC"]
airport_code_city_dict["SUN"] = ["Hailey/Sun Valley, ID"]
airport_code_city_dict["PHF"] = ["Hampton/Williamsburg, VA"]
airport_code_city_dict["BDL"] = ["Hartford/Windsor Locks, CT"]
airport_code_city_dict["ITO"] = ["Hilo - Island of Hawaii, HI"]
airport_code_city_dict["HNL"] = ["Honolulu - Island of Oahu, HI"]
airport_code_city_dict["HOU"] = ["Houston - Hobby, TX"]
airport_code_city_dict["IAH"] = ["Houston - George Bush Intercontinental, TX"]
airport_code_city_dict["OGG"] = ["Kahului- Island of Maui, HI"]
airport_code_city_dict["LIH"] = ["Lihue - Island of Kaui, HI"]
airport_code_city_dict["KOA"] = ["Kona - Island of Hawaii, HI"]
airport_code_city_dict["MFE"] = ["Mcallen/Mission, TX"]
airport_code_city_dict["MIA"] = ["Miami, FL"]
airport_code_city_dict["MAF"] = ["Midland/Odessa, TX"]
airport_code_city_dict["JFK"] = ["New York - Kennedy, NY"]
airport_code_city_dict["LGA"] = ["New York - La Guardia, NY"]
airport_code_city_dict["SWF"] = ["Newburgh/Stewart Field, NY"]
airport_code_city_dict["SNA"] = ["Orange County/Santa Ana, CA"]
airport_code_city_dict["MCO"] = ["Orlando, FL"]
airport_code_city_dict["PHL"] = ["Philadelphia, PA"]
airport_code_city_dict["TTN"] = ["Trenton/Mercer, NJ"]
airport_code_city_dict["AVP"] = ["Scranton/Wilkes Barre, PA"]
airport_code_city_dict["SEA"] = ["Seattle/Tacoma, WA"]
airport_code_city_dict["CWA"] = ["Stevens Point/Wausau, WI"]
airport_code_city_dict["EGE"] = ["Vail, CO "]
airport_code_city_dict["IAD"] = ["Washington - Dulles, DC"]
airport_code_city_dict["DCA"] = ["Washington - Reagan National, DC"]

In [21]:
# Separate city and state from airport location string
for airport_code in airport_code_city_dict:
    city_state = airport_code_city_dict[airport_code][0]
    
    city_state_split = city_state.split(",")
    city = city_state_split[0].strip()
    state = city_state_split[1].strip()
    
    airport_code_city_dict[airport_code] = {"City":city, "State":state}
    
    city = airport_code_city_dict[airport_code]["City"]
    state = airport_code_city_dict[airport_code]["State"]
    print(f"Airport code:[{airport_code}] City:[{city}] State:[{state}]")
    

Airport code:[ABR] City:[Aberdeen] State:[SD]
Airport code:[ABI] City:[Abilene] State:[TX]
Airport code:[ADK] City:[Adak Island] State:[AK]
Airport code:[CAK] City:[Akron/Canton] State:[OH]
Airport code:[ALB] City:[Albany] State:[NY]
Airport code:[ABQ] City:[Albuquerque] State:[NM]
Airport code:[AEX] City:[Alexandria] State:[LA]
Airport code:[ABE] City:[Allentown/Bethlehem/Easton] State:[PA]
Airport code:[APN] City:[Alpena] State:[MI]
Airport code:[AMA] City:[Amarillo] State:[TX]
Airport code:[ANC] City:[Anchorage] State:[AK]
Airport code:[ATW] City:[Appleton] State:[WI]
Airport code:[ACV] City:[Eureka/Arcata] State:[CA]
Airport code:[AVL] City:[Asheville/Hendersonville] State:[NC]
Airport code:[HTS] City:[Ashland] State:[KY]
Airport code:[ASE] City:[Aspen] State:[CO]
Airport code:[ATL] City:[Atlanta] State:[GA]
Airport code:[AGS] City:[Augusta] State:[GA]
Airport code:[AUS] City:[Austin] State:[TX]
Airport code:[BFL] City:[Bakersfield] State:[CA]
Airport code:[BWI] City:[Baltimore] St

# Save Airport Code City Name Dataframe to CSV File

In [22]:
# Create airport code city dataframe
airport_city_code_df = pd.DataFrame.from_dict(airport_code_city_dict, orient="index", columns=["City", "State"])

In [23]:
airport_city_code_df.head(400)

Unnamed: 0,City,State
ABR,Aberdeen,SD
ABI,Abilene,TX
ADK,Adak Island,AK
CAK,Akron/Canton,OH
ALB,Albany,NY
...,...,...
ILM,Wilmington,NC
ORH,Worcester,MA
WRG,Wrangell,AK
YAK,Yakutat,AK


In [24]:
# Reset index to integer from airport code
airport_city_code_df.reset_index(inplace=True)
airport_city_code_df.rename(columns={"index":"Airport Code"}, inplace=True)

In [25]:
airport_city_code_df.head()

Unnamed: 0,Airport Code,City,State
0,ABR,Aberdeen,SD
1,ABI,Abilene,TX
2,ADK,Adak Island,AK
3,CAK,Akron/Canton,OH
4,ALB,Albany,NY


In [26]:
airport_city_code_df.to_csv("flight_data_files/airport_codes_city.csv", index=False)