### Import needed libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import chart_studio.plotly as py
import re

### Read in the csv file

In [2]:
nonstandard = pd.read_csv('C:/Users/Olga/Desktop/GMU/DAEN690/LEGO/Incidents_Cleaned_NonStandard.csv')
nonstandard

Unnamed: 0,dateonly,timeonly,DATE,CALLSIGN,POD,PRIMARYCODE,SECONDARYCODES,REPORTINGFACILITY,REMARKS,MORID.CEDAR,...,IFRIVR.CEDAR,AIRSPACECLASS.CEDAR,ACLOCATION.CEDAR,ACALTITUDE.CEDAR,ACHEADING.CEDAR,RELATIVECLOCKPOSITION.CEDAR,UASLONG.CEDAR,UASLAT.CEDAR,SUMMARY.CEDAR,QAFINDINGS.CEDAR
0,1/4/2018,5:00,2018-01-04T05:00Z,ASA918,DEN,UAS,,WOC,W-ROC reports aircraft reported a white UAS at...,,...,,,,,,,,,,
1,1/4/2018,8:31,2018-01-04T08:31Z,REH9,DEN,UAS,,SAN,Reported a red and white UAS passed helicopter...,,...,,,,,,,,,,
2,1/4/2018,10:50,2018-01-04T10:50Z,,DEN,UAS,,SHV,ATCT personnel reported observing a large UAS ...,,...,,,,,,,,,,
3,1/5/2018,11:35,2018-01-05T11:35Z,SKW3019,DEN,UAS,,TUS,"Aircrew reported observing a light from a UAS,...",,...,,,,,,,,,,
4,1/6/2018,1:56,2018-01-06T01:56Z,N1671C,DEN,UAS,,IWA,Aircraft reported UAS activity while entering ...,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3438,8/23/2021,18:34,2021-08-23T18:34Z,,JATOC,UAS,,SAT,"1441 EDT / 1841 UTC 8/23/2021\r\nHAVOC39, C-1...",,...,,,,,,,,,,
3439,8/23/2021,21:53,2021-08-23T21:53Z,,JATOC,UAS,,ORD,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,,...,,,,,,,,,,
3440,8/25/2021,15:35,2021-08-25T15:35Z,SIL9006,DEN,UAS,,TPA,Aircraft observed two UAS off the left side wh...,,...,,,,,,,,,,
3441,8/26/2021,0:47,2021-08-26T00:47Z,,DEN,UAS,,BOS,MA State PD observed a UAS over the airport at...,,...,,,,,,,,,,


### Extracting Geospatial Location Information from Non-Standard Remarks in the format: 'XX NM XXX' or '5 NM SSW'

In [3]:
# Creating List that contains each the remarks with the headings contained in it
remark_head_dir = []
remarks = nonstandard['REMARKS']

# Using regular expression for the heading/direction
headir_regex = '[0-9][0-9]*[0-9]*\s?NM* [N|S|E|W|NW|NE|SW|SE|SSE|SSW|SNE|SNW|NNE|NSE|NNE|NNW|WSW|WNW|WSE|WNE|ENE|ESE|ESW|ENW]*'

# For loop through remarks to find headings
for i in range(len(remarks)):
    head_dir = re.findall(headir_regex, str(remarks[i]))
    remark_head_dir.append(remarks[i])
    remark_head_dir.append(head_dir)

In [4]:
# Split Remarks and Heading/Directions into two seperate lists and create pandas dataframe
remark = []
head_dir = []

for i in range(0, len(remark_head_dir), 2):
    remark.append(remark_head_dir[i])
    head_dir.append(remark_head_dir[i+1])

remark_head_dir_df = pd.DataFrame()
remark_head_dir_df['REMARKS'] = remark
remark_head_dir_df['Heading_Direction'] = head_dir

In [5]:
remark_head_dir_df

Unnamed: 0,REMARKS,Heading_Direction
0,W-ROC reports aircraft reported a white UAS at...,[]
1,Reported a red and white UAS passed helicopter...,[]
2,ATCT personnel reported observing a large UAS ...,[]
3,"Aircrew reported observing a light from a UAS,...",[]
4,Aircraft reported UAS activity while entering ...,[3NM SW]
...,...,...
3438,"1441 EDT / 1841 UTC 8/23/2021\r\nHAVOC39, C-1...",[]
3439,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,[]
3440,Aircraft observed two UAS off the left side wh...,[1 NM N]
3441,MA State PD observed a UAS over the airport at...,[]


### Export dataframe into csv file

In [31]:
remark_head_dir_df.to_csv('Non-standarized_remarks_heading_direction.csv', index = False)

In [6]:
# Create List that contains each Remark, and the Heading/Direction information contained in each remark
remark_uas_loc = []
remarks = nonstandard['REMARKS']

# Using regular expression for any heading/direction
headir_regex = '[0-9][0-9]*[0-9]*\s?NM* [N|S|E|W|NW|NE|SW|SE|SSE|SSW|SNE|SNW|NNE|NSE|NNE|NNW|WSW|WNW|WSE|WNE|ENE|ESE|ESW|ENW]*\s?of?\s?[A-Z][A-Z][A-Z][A-Z]?'

# Loop through all remarks and search for the heading/direction regex above
for i in range(len(remarks)):
    head_dir = re.findall(headir_regex, str(remarks[i]))
    remark_uas_loc.append(remarks[i])
    remark_uas_loc.append(head_dir)

# Split Remarks and Heading/Directions into two seperate lists and create pandas dataframe
remark = []
uas_loc = []

for i in range(0, len(remark_uas_loc), 2):
    remark.append(remark_uas_loc[i])
    uas_loc.append(remark_uas_loc[i+1])

remark_uas_loc_df = pd.DataFrame()
remark_uas_loc_df['REMARKS'] = remark
remark_uas_loc_df['UAS Location'] = uas_loc

In [9]:
remark_uas_loc_df

Unnamed: 0,REMARKS,UAS Location
0,W-ROC reports aircraft reported a white UAS at...,[]
1,Reported a red and white UAS passed helicopter...,[]
2,ATCT personnel reported observing a large UAS ...,[]
3,"Aircrew reported observing a light from a UAS,...",[]
4,Aircraft reported UAS activity while entering ...,[3NM SW ofIWA]
...,...,...
3438,"1441 EDT / 1841 UTC 8/23/2021\r\nHAVOC39, C-1...",[]
3439,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,[]
3440,Aircraft observed two UAS off the left side wh...,[1 NM N of TPA]
3441,MA State PD observed a UAS over the airport at...,[]


### Export to csv file

In [44]:
remark_uas_loc_df.to_csv('Non-standarized_remarks_uas_location.csv', index = False)

In [10]:
uas_loc = remark_uas_loc_df['UAS Location'] 

# If the regular expressions did not hit on any location information, pass it UNKN for now
for i in range(len(remark_uas_loc_df)):
    if len(uas_loc[i]) == 0:
        uas_loc[i] = 'UNKN'

In [11]:
len(uas_loc[uas_loc != 'UNKN']) # 794 records were successfully found by regular expressions

794

In [12]:
uas_loc_nonNull = uas_loc[uas_loc != 'UNKN'].to_list()
uas_airport = []

for i in range(len(uas_loc_nonNull)):
    airport = uas_loc_nonNull[i][0].split(' ')[-1]
    
    if len(airport) <= 4:
        uas_airport.append(airport)
    else:
        trim_air = airport[-3:]
        uas_airport.append(trim_air)

In [13]:
uas_airport_df = pd.DataFrame()
uas_airport_df['IDENT'] = uas_airport
uas_airport_df

Unnamed: 0,IDENT
0,IWA
1,OPF
2,SAN
3,OAK
4,HND
...,...
789,EVV
790,SLI
791,ORD
792,ORD


In [25]:
# Create Dataframe of Non-Standard Format Remarks with NON NULL UAS Locations
remark_uas_loc_nn = remark_uas_loc_df[remark_uas_loc_df['UAS Location'] != 'UNKN'].reset_index()
remark_uas_loc_nn

Unnamed: 0,index,REMARKS,UAS Location
0,4,Aircraft reported UAS activity while entering ...,[3NM SW ofIWA]
1,13,Reported a UAS off the left side at 500 feet w...,[5 NM W of OPF]
2,14,Reported a black UAS off the left side of the ...,[5 NM E of SAN]
3,21,Aircraft reported UAS pass below aircraft whil...,[8 NM ESE of OAK]
4,36,"Aircraft reported 3 large UAS at 3,000 feet 3....",[5 NM SW of HND]
...,...,...,...
789,3429,Facility manager observed a UAS from the groun...,[3 NM N of EVV]
790,3431,NWS Seal Beach Base personnel observed a UAS o...,[72 NM SW of SLI]
791,3434,"PIC reports aircraft struck a multi-colored, 2...",[1 NM E of ORD]
792,3435,2120 EDT / 0120 UTC 08/23/2021\r\nENY3961 advi...,[1 NM E of ORD]


### Read in CSV csv file

In [26]:
airportsC = pd.read_csv('C:/Users/Olga/Desktop/GMU/DAEN690/LEGO/airports_cleaned.csv')

### Join datasets together to have the UAS location information together

In [27]:
uas_air_loc= pd.merge(uas_airport_df, airportsC, on='IDENT', how='left')
full_loc = pd.DataFrame()
full_loc['REMARKS'] = remark_uas_loc_nn['REMARKS']
full_loc['UAS_LOC'] = remark_uas_loc_nn['UAS Location']

final_df1 = pd.concat([full_loc, uas_air_loc], axis = 1)

In [28]:
final_df1

Unnamed: 0,REMARKS,UAS_LOC,IDENT,NAME,LATITUDE,LONGITUDE,ICAO_ID
0,Aircraft reported UAS activity while entering ...,[3NM SW ofIWA],IWA,Phoenix-Mesa Gateway,33.307824,-111.655459,KIWA
1,Reported a UAS off the left side at 500 feet w...,[5 NM W of OPF],OPF,Miami-Opa Locka Exec,25.907417,-80.278222,KOPF
2,Reported a black UAS off the left side of the ...,[5 NM E of SAN],SAN,San Diego Intl,32.733556,-117.189667,KSAN
3,Aircraft reported UAS pass below aircraft whil...,[8 NM ESE of OAK],OAK,Metro Oakland Intl,37.721250,-122.221139,KOAK
4,"Aircraft reported 3 large UAS at 3,000 feet 3....",[5 NM SW of HND],HND,Henderson Exec,35.972861,-115.134444,KHND
...,...,...,...,...,...,...,...
789,Facility manager observed a UAS from the groun...,[3 NM N of EVV],EVV,Evansville Rgnl,38.040806,-87.528500,KEVV
790,NWS Seal Beach Base personnel observed a UAS o...,[72 NM SW of SLI],SLI,Los Alamitos AAF,33.790030,-118.051420,KSLI
791,"PIC reports aircraft struck a multi-colored, 2...",[1 NM E of ORD],ORD,Chicago O'hare Intl,41.976629,-87.907253,KORD
792,2120 EDT / 0120 UTC 08/23/2021\r\nENY3961 advi...,[1 NM E of ORD],ORD,Chicago O'hare Intl,41.976629,-87.907253,KORD


In [29]:
final_df1 = pd.merge(final_df1, nonstandard, on='REMARKS', how='left')
final_df1

Unnamed: 0,REMARKS,UAS_LOC,IDENT,NAME,LATITUDE,LONGITUDE,ICAO_ID,dateonly,timeonly,DATE,...,IFRIVR.CEDAR,AIRSPACECLASS.CEDAR,ACLOCATION.CEDAR,ACALTITUDE.CEDAR,ACHEADING.CEDAR,RELATIVECLOCKPOSITION.CEDAR,UASLONG.CEDAR,UASLAT.CEDAR,SUMMARY.CEDAR,QAFINDINGS.CEDAR
0,Aircraft reported UAS activity while entering ...,[3NM SW ofIWA],IWA,Phoenix-Mesa Gateway,33.307824,-111.655459,KIWA,1/6/2018,1:56,2018-01-06T01:56Z,...,,,,,,,,,,
1,Reported a UAS off the left side at 500 feet w...,[5 NM W of OPF],OPF,Miami-Opa Locka Exec,25.907417,-80.278222,KOPF,1/12/2018,3:54,2018-01-12T03:54Z,...,,,,,,,,,,
2,Reported a black UAS off the left side of the ...,[5 NM E of SAN],SAN,San Diego Intl,32.733556,-117.189667,KSAN,1/12/2018,5:58,2018-01-12T05:58Z,...,,,,,,,,,,
3,Aircraft reported UAS pass below aircraft whil...,[8 NM ESE of OAK],OAK,Metro Oakland Intl,37.721250,-122.221139,KOAK,1/15/2018,5:30,2018-01-15T05:30Z,...,,,,,,,,,,
4,"Aircraft reported 3 large UAS at 3,000 feet 3....",[5 NM SW of HND],HND,Henderson Exec,35.972861,-115.134444,KHND,1/21/2018,8:01,2018-01-21T08:01Z,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
797,Facility manager observed a UAS from the groun...,[3 NM N of EVV],EVV,Evansville Rgnl,38.040806,-87.528500,KEVV,8/16/2021,22:02,2021-08-16T22:02Z,...,,,,,,,,,,
798,NWS Seal Beach Base personnel observed a UAS o...,[72 NM SW of SLI],SLI,Los Alamitos AAF,33.790030,-118.051420,KSLI,8/17/2021,21:14,2021-08-17T21:14Z,...,,,,,,,,,,
799,"PIC reports aircraft struck a multi-colored, 2...",[1 NM E of ORD],ORD,Chicago O'hare Intl,41.976629,-87.907253,KORD,8/22/2021,23:39,2021-08-22T23:39Z,...,IFR,B,ORD R090 @1DME,250.0,90.0,12.0,,,ENY3961 DEPARTED RWY09C FOR DTW. ONE MILE OFF ...,QA reviewed. ENY3961 departed the ORD Airport ...
800,2120 EDT / 0120 UTC 08/23/2021\r\nENY3961 advi...,[1 NM E of ORD],ORD,Chicago O'hare Intl,41.976629,-87.907253,KORD,8/23/2021,0:25,2021-08-23T00:25Z,...,IFR,B,1NM ENE ORD RY9C,20.0,360.0,,,,ENY3961 ADVISED C90 SHORTLY AFTER DEPARTING OR...,QA reviewed. All data in linked MOR.


### Export the final dataframe to csv file that contains all Non-standard remarks with their extracted locations

In [30]:
final_df1.to_csv('non-std_format_uas_loc.csv', index = False)