In [38]:
# Basic Imports
import warnings
import pandas as pd

# Removing warnings
warnings.filterwarnings('ignore')

# Removing max column value in Pandas
pd.set_option('display.max_columns', None)

# Data Imports
faa_data = pd.read_excel("./raw-data/faa-strike-data.xlsx")

In [88]:
strike_data = faa_data[[
    "INDEX_NR", # Record ID
    "AIRCRAFT", # Type Of Aircraft
    "AIRPORT", # Airport Name
    "NUM_STRUCK", # Number of birds struck
    "EFFECT", # Effect on flight
    "INCIDENT_DATE", # Flight Date
    "DAMAGE_LEVEL", # Damage done to the plane
    "NUM_ENGS", # Number of engines on plane
    "OPID", # ID of the airline operator (for further processing)
    "STATE", # Origin State
    "PHASE_OF_FLIGHT", # Phase of flight that the plane was in
    "PRECIPITATION", # Precipitation
    "REMAINS_COLLECTED", # Bool val for whether or not the remains were collected
    "REMAINS_SENT", # Bool val for whether or not the remains were sent to the Smithsonian for identification
    "REMARKS", # Pilot remarks
    "SIZE", # Size of birds struck (reported by pilot)
    "SKY", # Sky conditions during the strike
    "SPECIES", # Species of the bird struck
    "WARNED", # Whether or not the pilot was warned about the birds before strike
    "COST_REPAIRS", # Cost of repairs (not adjusted for inflation)
    "COST_OTHER", # Other costs associated with the strike (not adjusted for inflation)
    "COST_REPAIRS_INFL_ADJ", # Cost of repairs (adjusted for inflation)
    "COST_OTHER_INFL_ADJ", # Other costs associated with the strike (adjusted for inflation)
    "HEIGHT", # Altitude that strick happened
    "DISTANCE" # Distance from the airport
]]

strike_data = strike_data.dropna(subset=["AIRPORT"]).reset_index(drop=True)
strike_data = strike_data[strike_data["AIRPORT"] != "UNKNOWN"]

In [89]:
def process_string_lat_long(latlong):
    if (type(latlong) == str):
        if ('°' in latlong):
            placeholder = latlong.split('°')
            degrees = float(placeholder[0])

            placeholder = placeholder[1].split("'")
            minutes = float(placeholder[0]) / 60

            placeholder = placeholder[1][0:2]
            seconds = float(placeholder[0]) / 3600

            return round(degrees + minutes + seconds, 7)
        else:        
            print(latlong)
            output = float(latlong.split(",")[0])
            print(output)
            return output
    else:
        return latlong

airport_data = faa_data[[
    "AIRPORT",
    "AIRPORT_LATITUDE",
    "AIRPORT_LONGITUDE"
]]

airport_data = airport_data.drop_duplicates(subset=["AIRPORT"]).reset_index(drop=True)

airport_data.loc[airport_data["AIRPORT"] == "ISTANBUL AIRPORT", ["AIRPORT_LATITUDE", "AIRPORT_LONGITUDE"]] = [41.2768, 28.7301]

airport_data["AIRPORT_LATITUDE"] = airport_data["AIRPORT_LATITUDE"].apply(process_string_lat_long)
airport_data["AIRPORT_LONGITUDE"] = airport_data["AIRPORT_LONGITUDE"].apply(process_string_lat_long)

airport_data.dropna(inplace=True)

airport_data.head(10)

70.3442778,-15
70.3442778
37.0747673,-77
37.0747673
67.0086750,-14
67.008675
42.6907171,-88
42.6907171
42.6907171,-88
42.6907171


Unnamed: 0,AIRPORT,AIRPORT_LATITUDE,AIRPORT_LONGITUDE
0,SACRAMENTO INTL,38.69542,-121.59077
1,DENVER INTL AIRPORT,39.85841,-104.667
2,EPPLEY AIRFIELD,41.30252,-95.89417
3,WASHINGTON DULLES INTL ARPT,38.94453,-77.45581
4,LA GUARDIA ARPT,40.77724,-73.87261
5,SAN ANTONIO INTL,29.53369,-98.46978
6,KANSAS CITY INTL,39.29761,-94.71391
7,DALLAS/FORT WORTH INTL ARPT,32.89595,-97.0372
8,NORMAN Y. MINETA SAN JOSE INTL ARPT,37.36186,-121.92901
9,LAMBERT-ST LOUIS INTL,38.74769,-90.35999


In [90]:
def processing_effect_data(effect_val):
    if (effect_val == "nan"):
        return "No Effect Or Unknown"
    else:
        return effect_val

strike_data["EFFECT"] = strike_data["EFFECT"].astype(str)
strike_data["EFFECT"] = strike_data["EFFECT"].apply(processing_effect_data)

In [91]:
def processing_damage_data(damage_val):
    if (damage_val == "N"):
        return "No Damage"
    elif (damage_val == "M"):
        return "Minor Damage"
    elif (damage_val == "S"):
        return "Substantial Damage"
    elif (damage_val == "D"):
        return "Aircraft Destroyed"
    else:
        return "Unknown Damage"

strike_data["DAMAGE_LEVEL"] = strike_data["DAMAGE_LEVEL"].apply(processing_damage_data)

In [92]:
def processing_precip_data(precip_val):
    if (precip_val == "nan"):
        return "Unknown Precipitation"
    else:
        return precip_val

strike_data["PRECIPITATION"] = strike_data["PRECIPITATION"].astype(str)
strike_data["PRECIPITATION"] = strike_data["PRECIPITATION"].apply(processing_precip_data)

In [93]:
def processing_sky_data(sky_val):
    if (sky_val == "nan"):
        return "Unknown Conditions"
    else:
        return sky_val

strike_data["SKY"] = strike_data["SKY"].astype(str)
strike_data["SKY"] = strike_data["SKY"].apply(processing_sky_data)


In [94]:
def processing_repair_data(repair_val):
    if (repair_val == "nan"):
        return "Unknown Repair Costs"
    else:
        return repair_val

strike_data["SKY"] = strike_data["SKY"].astype(str)
strike_data["SKY"] = strike_data["SKY"].apply(processing_sky_data)
strike_data.head(20)

Unnamed: 0,INDEX_NR,AIRCRAFT,AIRPORT,NUM_STRUCK,EFFECT,INCIDENT_DATE,DAMAGE_LEVEL,NUM_ENGS,OPID,STATE,PHASE_OF_FLIGHT,PRECIPITATION,REMAINS_COLLECTED,REMAINS_SENT,REMARKS,SIZE,SKY,SPECIES,WARNED,COST_REPAIRS,COST_OTHER,COST_REPAIRS_INFL_ADJ,COST_OTHER_INFL_ADJ,HEIGHT,DISTANCE
0,608242,B-737-300,SACRAMENTO INTL,1,No Effect Or Unknown,1996-06-22,Unknown Damage,2.0,UAL,CA,Take-off Run,Unknown Precipitation,0,0,BLOOD ON L FWD NOSE AREA SEEN BY CREW AFTER LDG.,Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,0.0,0.0
1,608243,B-737-300,DENVER INTL AIRPORT,1,No Effect Or Unknown,1996-06-26,Unknown Damage,2.0,UAL,CO,Take-off Run,Unknown Precipitation,0,0,CREW SUSPECTED BIRDSTRIKE ON T/O. LOOKED LIKE ...,Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,0.0,0.0
2,608244,B-757-200,EPPLEY AIRFIELD,1,No Effect Or Unknown,1996-07-01,No Damage,2.0,UAL,NE,Take-off Run,Unknown Precipitation,0,0,BIRDSTRIKE AT ROTATION. INSPN. NO DMG.,Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,0.0,0.0
3,608245,A-320,WASHINGTON DULLES INTL ARPT,1,No Effect Or Unknown,1996-07-01,No Damage,2.0,UAL,DC,Approach,Unknown Precipitation,0,0,"ON FINAL APCH, STRIKE UNDER THE NOSE OF THE CO...",Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,1000.0,
4,608246,A-320,LA GUARDIA ARPT,1,No Effect Or Unknown,1996-07-01,Minor Damage,2.0,UAL,NY,Climb,Unknown Precipitation,0,0,LOUD NOISE WAS HEARD DURING CLIMBOUT THAT SOUN...,Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,5000.0,
5,608247,B-727-100,SAN ANTONIO INTL,1,No Effect Or Unknown,1991-05-06,No Damage,3.0,AAL,TX,Take-off Run,Unknown Precipitation,0,0,NO DAMAGE/INJURY. TIME = 1125 GMT,Medium,No Cloud,Unknown bird - medium,No,,,,,0.0,0.0
6,608248,BE-1900,KANSAS CITY INTL,11-100,No Effect Or Unknown,1993-11-05,Unknown Damage,2.0,AMW,MO,Approach,Unknown Precipitation,0,0,ATIS WARNING. STRUCK NUMEROUS OTHER PARTS OF A...,Medium,Overcast,Unknown bird - medium,Yes,,,,,1500.0,
7,608249,B-737-500,KANSAS CITY INTL,1,No Effect Or Unknown,1995-07-02,No Damage,2.0,UAL,MO,Landing Roll,Unknown Precipitation,0,0,STRIKE ON NOSE IMMEDIATELY AFTER LANDING. STRU...,Medium,Unknown Conditions,Unknown bird - medium,Unknown,,,,,0.0,0.0
8,608250,MD-80,DALLAS/FORT WORTH INTL ARPT,2-10,No Effect Or Unknown,1990-09-19,Unknown Damage,2.0,AAL,TX,Approach,Unknown Precipitation,0,0,HIT 2. SPARROW OR OTHER SMALL SIZE BIRD. NO DA...,Small,Some Cloud,Unknown bird - small,No,,,,,100.0,
9,608251,MERLIN IV,NORMAN Y. MINETA SAN JOSE INTL ARPT,1,No Effect Or Unknown,1992-05-03,No Damage,2.0,WWM,CA,Landing Roll,Unknown Precipitation,0,0,NO DAMAGE,Small,Some Cloud,American robin,Yes,,,,,0.0,0.0


In [95]:
strike_data.to_csv("./processed-data/strike-data.csv", index=False)
airport_data.to_csv("./processed-data/airport-locations.csv", index=False)