In [None]:
# Import libraries
import pandas as pd
import numpy as np
import urllib
import json
import datetime
import re
import xml.dom.minidom
import spacy
import shutil
import geopandas as gpd
nlp = spacy.load('en_core_web_sm')

# Define all methods necessary for later use

## Method for extracting text from xml file

In [None]:
def getText(nodelist):
    rc = []
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc.append(node.data)
    return ''.join(rc)

## Method for extracting declination at a speicific location on a specific date

In [None]:
def getDecination(latitude, longitude, date = ''):
    # Set base URL for query
    url = "https://www.ngdc.noaa.gov/geomag-web/calculators/calculateDeclination?browserRequest=true&magneticComponent=d&"
    # Set hemisphere for lat/long
    if latitude > 0:
        latLabel = 'N'
    else:
        latLabel = 'S'
    if longitude > 0:
        lonLabel = 'E'
    else:
        lonLabel = 'W'
    # Parse date into day, month, year
    if date is None:
        year = datetime.now().year
        month = datetime.now().month
        day = datetime.now().day
    else:
        year, month, day = date.split('T', 1)[0].split('-',2)
    # Encode URL parameters
    data = urllib.parse.urlencode({'lat1': abs(latitude), 'lat1Hemisphere': latLabel, 'lon1': abs(longitude), 'lon1Hemisphere': lonLabel, 'model': 'WMM', 'startYear': year, 'startMonth': month, 'startDay': day, 'resultFormat': 'xml'})
    url_req = url + data
    # Pass request to site
    req = urllib.request.urlopen(url_req)
    # Process XML file into object tree and get only declination info
    dom = xml.dom.minidom.parseString(req.read().decode("UTF-8"))
    myString = getText(dom.getElementsByTagName("declination")[0].childNodes)
    # Removes formatting to leave only declination value
    declination = str(re.findall(r"[-+]?\d*\.\d+|\d+", myString)[0])
    # Sets value for row
    return declination

## Method to convert lat/long from Degree/Minute/Second to Decimal

In [None]:
def dms_to_dd (loc):
    if type(loc) == float or loc == '':
        dd = loc
        
    else:
        loc = loc.strip()
        #loc = loc.replace('', '00-00-00.0000N')
        d, m, s = map(float, loc[:-1].split('-'))
        h = loc[-1]
    
        dd = float(d) + float(m)/60 + float(s)/(60*60);
        if h == 'S' or h == 'W':
            dd *= -1
    
    return dd;

## Method to calculate centroid of a convex polygon from a list of vertices

In [None]:
def centroid(vertices):
     _x_list = [vertex [0] for vertex in vertices]
     _y_list = [vertex [1] for vertex in vertices]
     _len = len(vertices)
     _x = sum(_x_list) / _len
     _y = sum(_y_list) / _len
     return(_x, _y)

# Extract and clean Incidents and all supplementary datasets for reference

## Extract Incidents dataset

In [None]:
# Read csv into dataframe
incidents = pd.read_csv("Incident_Export_2021-08-26T08_57Z.csv", header = 0)
# Replace NA values with a null string
incidents.fillna('', inplace=True)
# Add columns for CEDAR Remarks
incidents['EVENTTYPE.CEDAR'] = ''
incidents['STATUS.CEDAR'] = ''
incidents['MORID.CEDAR'] = ''
incidents['FACILITY.CEDAR'] = ''
incidents['EVENTDATE.CEDAR'] = ''
incidents['UTCTIME.CEDAR'] = ''
incidents['UTCTIME24.CEDAR'] = ''
incidents['CALENDARDATE.CEDAR'] = ''
incidents['NEARESTAIRPORT.CEDAR'] = ''
incidents['METAR.CEDAR'] = ''
incidents['POTENTIALLYSIGNIFICANT.CEDAR'] = ''
incidents['CALLSIGN.CEDAR'] = ''
incidents['ACTYPE.CEDAR'] = ''
incidents['IFRIVR.CEDAR'] = ''
incidents['AUTHCERT.CEDAR'] = ''
incidents['AIRSPACECLASS.CEDAR'] = ''
incidents['ACLOCATION.CEDAR'] = ''
incidents['ACALTITUDE.CEDAR'] = ''
incidents['ACHEADING.CEDAR'] = ''
incidents['RELATIVECLOCKPOSITION.CEDAR'] = ''
incidents['UASREGISTRATIONNUM.CEDAR'] = ''
incidents['UASLONG.CEDAR'] = ''
incidents['UASLAT.CEDAR'] = ''
incidents['UASTYPE.CEDAR'] = ''
incidents['UASFORMATION.CEDAR'] = ''
incidents['CLOSESTPROXIMITY.CEDAR'] = ''
incidents['UASWEIGHTGT55.CEDAR'] = ''
incidents['UASDIM.CEDAR'] = ''
incidents['UASFWROTOR.CEDAR'] = ''
incidents['UASACTIVITYRISK.CEDAR'] = ''
incidents['UASCOLOR.CEDAR'] = ''
incidents['PILOTREPORTEDNMAC.CEDAR'] = ''
incidents['TCASRA.CEDAR'] = ''
incidents['LEOCONTACT.CEDAR'] = ''
incidents['SUMMARY.CEDAR'] = ''
incidents['QAFINDINGS.CEDAR'] = ''
incidents['UAS_LATITUDE'] = ''
incidents['UAS_LONGITUDE'] = ''

In [None]:
# Iterate through dataset to extract each comment and assign it to 
for i in incidents.index:
    try:
        event_type = re.search("CEDAR – Event Type: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'EVENTTYPE.CEDAR'] = event_type[:-1].lstrip('CEDAR – Event Type: ')
    except:
        incidents.at[i, 'EVENTTYPE.CEDAR'] = ''
    try:
        status = re.search("Status: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'STATUS.CEDAR'] = status[:-1].lstrip('Status: ')
    except:
        incidents.at[i, 'STATUS.CEDAR'] = ''
    try:
        mor_id = re.search("MOR ID: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'MORID.CEDAR'] = mor_id[:-1].lstrip('MOR ID: ')
    except:
        incidents.at[i, 'MORID.CEDAR'] = ''
    try:
        facility = re.search("Facility: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'FACILITY.CEDAR'] = facility[:-1].lstrip('Facility: ')
    except:
        incidents.at[i, 'FACILITY.CEDAR'] = ''
    try:
        event_date = re.search("Event Date: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'EVENTDATE.CEDAR'] = event_date[:-1].lstrip('Event Date: ')
    except:
        incidents.at[i, 'EVENTDATE.CEDAR'] = ''
    try:
        utc_time = re.search("UTC Time: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UTCTIME.CEDAR'] = utc_time[:-1].lstrip('UTC Time: ')
    except:
        incidents.at[i, 'UTCTIME.CEDAR'] = ''
    try:
        utc_time_24 = re.search("UTC Time 24 HR Format: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UTCTIME24.CEDAR'] = utc_time_24[:-1].lstrip('UTC Time 24 HR Format: ')
    except:
        incidents.at[i, 'UTCTIME24.CEDAR'] = ''
    try:
        calendar_date = re.search("Calendar Date: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'CALENDARDATE.CEDAR'] = calendar_date[:-1].lstrip('Calendar Date: ')
    except:
        incidents.at[i, 'CALENDARDATE.CEDAR'] = ''
    try:
        nearest_airport = re.search("Nearest Airport: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'NEARESTAIRPORT.CEDAR'] = nearest_airport[:-1].lstrip('Nearest Airport: ')
    except:
        incidents.at[i, 'NEARESTAIRPORT.CEDAR'] = ''
    try:
        metar = re.search("METAR: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'METAR.CEDAR'] = metar[:-1].lstrip('METAR: ')
    except:
        incidents.at[i, 'METAR.CEDAR'] = ''
    try:
        pot_significant = re.search("Potentially Significant: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'POTENTIALLYSIGNIFICANT.CEDAR'] = pot_significant[:-1].lstrip('Potentially Significant: ')
    except:
        incidents.at[i, 'POTENTIALLYSIGNIFICANT.CEDAR'] = ''
    try:
        call_sign = re.search("Callsign: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'CALLSIGN.CEDAR'] = call_sign[:-1].lstrip('Callsign: ')
    except:
        incidents.at[i, 'CALLSIGN.CEDAR'] = ''
    try:
        ac_type = re.search("A/C Type: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'ACTYPE.CEDAR'] = ac_type[:-1].lstrip('A/C Type: ')
    except:
        incidents.at[i, 'ACTYPE.CEDAR'] = ''
    try:
        ifr_ivr = re.search("IFR / IVR: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'IFRIVR.CEDAR'] = ifr_ivr[:-1].lstrip('IFR / IVR: ')
    except:
        incidents.at[i, 'IFRIVR.CEDAR'] = ''
    try:
        auth_cert = re.search("Certificate of Authorization: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'AUTHCERT.CEDAR'] = auth_cert[:-1].lstrip('Certificate of Authorization: ')
    except:
        incidents.at[i, 'AUTHCERT.CEDAR'] = ''
    try:
        airspace_class = re.search("Airspace Class: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'AIRSPACECLASS.CEDAR'] = airspace_class[:-1].lstrip('Airspace Class: ')
    except:
        incidents.at[i, 'AIRSPACECLASS.CEDAR'] = ''
    try:
        ac_location = re.search("A/C Location F/R/D: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'ACLOCATION.CEDAR'] = ac_location[:-1].lstrip('A/C Location: ')
    except:
        incidents.at[i, 'ACLOCATION.CEDAR'] = ''
    try:
        ac_altitude = re.search("A/C Altitude: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'ACALTITUDE.CEDAR'] = ac_altitude[:-1].lstrip('A/C Altitude: ')
    except:
        incidents.at[i, 'ACALTITUDE.CEDAR'] = ''
    try:
        ac_heading = re.search("A/C Heading: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'ACHEADING.CEDAR'] = ac_heading[:-1].lstrip('A/C Heading: ')
    except:
        incidents.at[i, 'ACHEADING.CEDAR'] = ''
    try:
        rcp = re.search("Relative Clock Position: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'RELATIVECLOCKPOSITION.CEDAR'] = rcp[:-1].lstrip('Relative Clock Position: ')
    except:
        incidents.at[i, 'RELATIVECLOCKPOSITION.CEDAR'] = ''
    try:
        uas_reg_num = re.search("UAS Registration #: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASREGISTRATIONNUM.CEDAR'] = uas_reg_num[:-1].lstrip('UAS Registration #: ')
    except:
        incidents.at[i, 'UASREGISTRATIONNUM.CEDAR'] = ''
    try:
        uas_long = re.search("UAS Longitude: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASLONG.CEDAR'] = uas_long[:-1].lstrip('UAS Longitude: ')
    except:
        incidents.at[i, 'UASLONG.CEDAR'] = ''
    try:
        uas_lat = re.search("UAS Latitude: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASLAT.CEDAR'] = uas_lat[:-1].lstrip('UAS Latitude: ')
    except:
        incidents.at[i, 'UASLAT.CEDAR'] = ''
    try:
        uas_type = re.search("UAS Type: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASTYPE.CEDAR'] = uas_type[:-1].lstrip('UAS Type: ')
    except:
        incidents.at[i, 'UASTYPE.CEDAR'] = ''
    try:
        uas_formation = re.search("UAS Formation: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASFORMATION.CEDAR'] = uas_formation[:-1].lstrip('UAS Formation: ')
    except:
        incidents.at[i, 'UASFORMATION.CEDAR'] = ''
    try:
        closest_prox = re.search("Closest Proximity (feet): (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'CLOSESTPROXIMITY.CEDAR'] = closest_prox[:-1].lstrip('Closest Proximity (feet): ')
    except:
        incidents.at[i, 'CLOSESTPROXIMITY.CEDAR'] = ''
    try:
        uas_weight = re.search("UAS Weight Exceeds 55lbs: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASWEIGHTGT55.CEDAR'] = uas_weight[:-1].lstrip('UAS Weight Exceeds 55lbs: ')
    except:
        incidents.at[i, 'UASWEIGHTGT55.CEDAR'] = ''
    try:
        uas_dim = re.search("UAS Dimensions (feet): (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASDIM.CEDAR'] = uas_dim[:-1].lstrip('UAS Dimensions (feet): ')
    except:
        incidents.at[i, 'UASDIM.CEDAR'] = ''
    try:
        uas_fwr = re.search("UAS Fixed Wing/Rotorcraft: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASFWROTOR.CEDAR'] = uas_fwr[:-1].lstrip('UAS Fixed Wing/Rotorcraft: ')
    except:
        incidents.at[i, 'UASFWROTOR.CEDAR'] = ''
    try:
        uas_act_risk = re.search("UAS Activity Risk: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASACTIVITYRISK.CEDAR'] = uas_act_risk[:-1].lstrip('UAS Activity Risk: ')
    except:
        incidents.at[i, 'UASACTIVITYRISK.CEDAR'] = ''
    try:
        uas_color = re.search("UAS Color: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'UASCOLOR.CEDAR'] = uas_color[:-1].lstrip('UAS Color: ')
    except:
        incidents.at[i, 'UASCOLOR.CEDAR'] = ''
    try:
        pilot_rep_dnmac = re.search("Pilot Reported as NMAC: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'PILOTREPORTEDNMAC.CEDAR'] = pilot_rep_dnmac[:-1].lstrip('Pilot Reported as NMAC: ')
    except:
        incidents.at[i, 'PILOTREPORTEDNMAC.CEDAR'] = ''
    try:
        tcas_ra = re.search("TCAS RA: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'TCASRA.CEDAR'] = tcas_ra[:-1].lstrip('TCAS RA: ')
    except:
        incidents.at[i, 'TCASRA.CEDAR'] = ''
    try:
        leo_contact = re.search("Law Enforcement Contact Info: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'LEOCONTACT.CEDAR'] = leo_contact[:-1].lstrip('Law Enforcement Contact Info: ')
    except:
        incidents.at[i, 'LEOCONTACT.CEDAR'] = ''
    try:
        summary = re.search("Summary: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'SUMMARY.CEDAR'] = summary[:-1].lstrip('Summary: ')
    except:
        incidents.at[i, 'SUMMARY.CEDAR'] = ''
    try:
        qa_findings = re.search("QA Findings: (.*?);", incidents['CEDAR.REMARKS'][i]).group()
        incidents.at[i, 'QAFINDINGS.CEDAR'] = qa_findings[:-1].lstrip('QA Findings: ')
    except:
        incidents.at[i, 'QAFINDINGS.CEDAR'] = ''

### Needs reviewed to convert from R to Python

In [None]:
# Standardize references to runways
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(Runway|runway|RUNWAY|RY)','RWY',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(Runway|runway|RUNWAY|RY)','RWY',regex=True)
# Standardize 'of the' to 'of'
incidents['REMARKS'] = incidents['REMARKS'].str.replace('of the','of',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('of the','of',regex=True)
# Standardize nautical/miles to NM
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(M|m)iles?','NM',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(M|m)iles?','NM',regex=True)
# Standardize UAS to lowercase to ease parsing alphabetical codes later
incidents['REMARKS'] = incidents['REMARKS'].str.replace('UAS','uas',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('UAS','uas',regex=True)
# I have no clue what this does
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(\\+\\d{1,2}\\s?)?1?\\-?\\.?\\s?\\(?\\d{3}\\)?[\\s.-]?\\d{3}[\\s.-]?\\d{4}','',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(\\+\\d{1,2}\\s?)?1?\\-?\\.?\\s?\\(?\\d{3}\\)?[\\s.-]?\\d{3}[\\s.-]?\\d{4}','',regex=True)
# I'm sure this does important things as well
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(NM)([A-Z]{1,3})','\\1 \\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(NM)([A-Z]{1,3})','\\1 \\2',regex=True)
# Not even going to pretend I know
incidents['REMARKS'] = incidents['REMARKS'].str.replace('([0-9]*\\-*\\/*[0-9]*\\.*[0-9]\\.*[0-9]*)(\\NM)','\\1 \\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('([0-9]*\\-*\\/*[0-9]*\\.*[0-9]\\.*[0-9]*)(\\NM)','\\1 \\2',regex=True)
# I should probably look all this stuff up
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(of)([A-Z]{3,4})','\\1 \\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(of)([A-Z]{3,4})','\\1 \\2',regex=True)
# I'm also copy/pasting from R code
incidents['REMARKS'] = incidents['REMARKS'].str.replace('([A-Z]{1,3})(of)','\\1 \\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('([A-Z]{1,3})(of)','\\1 \\2',regex=True)
# I sure do hope the syntax is the same
incidents['REMARKS'] = incidents['REMARKS'].str.replace('([A-Z]{1,3})(\\s[A-Z]{3,4}$)','\\1 of\\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('([A-Z]{1,3})(\\s[A-Z]{3,4}$)','\\1 of\\2',regex=True)
# It would really suck to have to go back and fix this
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(RWY)([0-9]{1,2}[L|R|C]?)','\\1 of\\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(RWY)([0-9]{1,2}[L|R|C]?)','\\1 of\\2',regex=True)
# Standardize UAS to lowercase to ease parsing alphabetical codes later
incidents['REMARKS'] = incidents['REMARKS'].str.replace('(RWY\\s)(\\d(?!\\d))','\\10\\2',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('(RWY\\s)(\\d(?!\\d))','\\10\\2',regex=True)
# Standardize double spaces to single spaces
incidents['REMARKS'] = incidents['REMARKS'].str.replace('  ',' ',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('  ',' ',regex=True)
# Standardize S/south to S
incidents['REMARKS'] = incidents['REMARKS'].str.replace('south|South','S',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('south|South','S',regex=True)
# Standardize E/east to E
incidents['REMARKS'] = incidents['REMARKS'].str.replace('east|East','E',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('east|East','E',regex=True)
# Standardize N/north to N
incidents['REMARKS'] = incidents['REMARKS'].str.replace('north|North','N',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('north|North','N',regex=True)
# Standardize W/west to W
incidents['REMARKS'] = incidents['REMARKS'].str.replace('west|West','W',regex=True)
incidents['SUMMARY.CEDAR'] = incidents['SUMMARY.CEDAR'].str.replace('west|West','W',regex=True)

## Extract Airports datasets

In [None]:
# Set URLs for API call
airport_url = "https://opendata.arcgis.com/api/v3/datasets/e747ab91a11045e8b3f8a3efd093d3b5_0/downloads/data?format=csv&spatialRefId=4326"
# Pull CSV from FAA site to dataframe
airport = pd.read_csv(airport_url)
# Remove unnecessary rows and rename for readability
airport_df = airport[['GLOBAL_ID', 'IDENT', 'ICAO_ID', 'NAME', 'LONGITUDE', 'LATITUDE']]

## Extract NAVAIDs dataset

In [None]:
# Set URLs for API call
navaid_url = "https://opendata.arcgis.com/api/v3/datasets/990e238991b44dd08af27d7b43e70b92_0/downloads/data?format=csv&spatialRefId=4326"
# Pull CSV from FAA site to dataframe
navaid = pd.read_csv(navaid_url)
# Remove unnecessary rows and rename for readability
navaid_df = navaid[['GLOBAL_ID', 'IDENT', 'NAME_TXT', 'X', 'Y', ]]
navaid_df = navaid_df.rename(columns={"X": "LONGITUDE", "Y": "LATITUDE"})

## Extract Runways dataset

In [None]:
# Pass request to site for GeoJSON
runway_url = "https://opendata.arcgis.com/datasets/4d8fa46181aa470d809776c57a8ab1f6_0.geojson"
req = urllib.request.urlopen(runway_url)
# Read into JSON
data = req.read()
runway_json = json.loads(data.decode('utf-8'))
# Read JSON into dataframe
runway_df = pd.json_normalize(runway_json, record_path =['features'])
# Add lat/long for runway center
runway_df['RUNWAY.LATITUDE'] = ''
runway_df['RUNWAY.LONGITUDE'] = ''
# Extract list of coordinates from nested list
for i in runway_df.index:
    runway_df.at[i, 'geometry.coordinates'] = runway_df['geometry.coordinates'][i][0]
# Iterate through dataframe to calculate center of runway and split into lat/long values
for i in runway_df.index:
    coords = centroid(runway_df['geometry.coordinates'][i])
    runway_df.at[i, 'RUNWAY.LATITUDE'] = coords[1]
    runway_df.at[i, 'RUNWAY.LONGITUDE'] = coords[0]
# Remove unnecessary rows and rename for readability
runway_df = runway_df[['properties.GLOBAL_ID','properties.AIRPORT_ID','properties.DESIGNATOR','RUNWAY.LATITUDE','RUNWAY.LONGITUDE']]
runway_df = runway_df.rename({'properties.GLOBAL_ID': 'GLOBAL_ID','properties.AIRPORT_ID': 'AIRPORT_ID','properties.DESIGNATOR': 'DESIGNATOR'}, axis=1)
# Inner join of Runways and Airports on Runways.AIRPORT_ID and Airports.GLOBAL_ID
runway_df = pd.merge(runway_df, airport_df, how='inner', left_on = 'AIRPORT_ID', right_on = 'GLOBAL_ID')
# Remove unnecessary rows for readability
runway_df = runway_df[['DESIGNATOR','RUNWAY.LATITUDE','RUNWAY.LONGITUDE','IDENT','ICAO_ID']]

## Extract Designated Points dataset

In [None]:
# Set URL for API call
point_url = "https://opendata.arcgis.com/datasets/861043a88ff4486c97c3789e7dcdccc6_0.geojson"
# Pass request to site for GeoJSON
point_req = urllib.request.urlopen(point_url)
# Read into JSON
point_data = point_req.read()
point_json = json.loads(point_data.decode('utf-8'))
# Read JSON into dataframe
point_df = pd.json_normalize(point_json, record_path =['features'])
# Add lat/long for runway center
point_df['LATITUDE'] = ''
point_df['LONGITUDE'] = ''
# Iterate through to calculate latitude/longitude for each point
# Needs if/then check to account for missing values in columns
for i in point_df.index:
    # Checks to see if lat/long is empty and converts from dms to dd if not
    if point_df['properties.LATITUDE'][i] is not None:
        point_df.at[i, 'LATITUDE'] =  dms_to_dd(point_df.at[i, 'properties.LATITUDE'])
        point_df.at[i, 'LONGITUDE'] = dms_to_dd(point_df.at[i, 'properties.LONGITUDE'])
    # Uses coordinates for remaining points
    else:
        point_df.at[i, 'LATITUDE'] =  point_df.at[i, 'geometry.coordinates'][1]
        point_df.at[i, 'LONGITUDE'] =  point_df.at[i, 'geometry.coordinates'][0]
# Remove unnecessary rows and rename for readability
point_df = point_df[['properties.GLOBAL_ID', 'properties.IDENT','LATITUDE','LONGITUDE']]
point_df = point_df.rename({'properties.GLOBAL_ID': 'GLOBAL_ID', 'properties.IDENT': 'IDENT'}, axis=1)

# Calculate UAS locations from Incidents dataset

### Run R scripts from Lex here

# Export to .shp file

In [None]:
# Set geometry type
geometry = gpd.points_from_xy([0], [0])
# Convert incidents to GeoDataFrame
incidents = gpd.GeoDataFrame(geometry = geometry)
# Export shapefile into zip folder
incidents.to_file('UAS_Incidents_Final.zip', driver = 'ESRI Shapefile')