## Import Statements and Main Data Read in (.csv)

In [None]:
# Import Statements
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import re

import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
# Read in Incidents_Cleaned_Standard.csv
faa_standard = pd.read_csv('C:/Users/grace/OneDrive/Desktop/GMU/DAEN690/Incidents_Cleaned_Standard.csv')
#faa_standard

## Distribution of Standard Format Remarks over Years 2018-2019

In [25]:
# General Histogram of standard format remarks over time
fig = px.histogram(faa_standard, x="dateonly", title = "Count of Remarks with Standard Format, by Date")
fig.update_layout(
    xaxis_title_text='Date', # xaxis label
    yaxis_title_text='Count of Remarks with Standard Format') # yaxis label
fig.show()

#### Standard Remarks in SkyWatch per Year

In [30]:
# Split dateonly column into the four different years present in the data
year_2021 = []
year_2020 = []
year_2019 = []
year_2018 = []

for i in range(len(faa_standard)):
    if '2021' in faa_standard['dateonly'][i]:
        year_2021.append(faa_standard['dateonly'][i])
        
    elif '2020' in faa_standard['dateonly'][i]:
        year_2020.append(faa_standard['dateonly'][i])
    
    elif '2019' in faa_standard['dateonly'][i]:
        year_2019.append(faa_standard['dateonly'][i])
    
    else:
        year_2018.append(faa_standard['dateonly'][i])

date18_df = pd.DataFrame(year_2018)
date18_df.columns = ['Year 2018']

date19_df = pd.DataFrame(year_2019)
date19_df.columns = ['Year 2019']

date20_df = pd.DataFrame(year_2020)
date20_df.columns = ['Year 2020']

date21_df = pd.DataFrame(year_2021)
date21_df.columns = ['Year 2021']

In [49]:
print('Count of Remarks with Standard Format Input through SkyWatch, per Year\n\nYear 2018: ',
      len(date18_df), 'standard remarks entered\nYear 2019: ', len(date19_df), 'standard remarks entered\nYear 2020: ',
     len(date20_df), 'standard remarks entered\nYear 2021: ', len(date21_df), 'standard remarks entered')

Count of Remarks with Standard Format Input through SkyWatch, per Year

Year 2018:  814 standard remarks entered
Year 2019:  1198 standard remarks entered
Year 2020:  1444 standard remarks entered
Year 2021:  1735 standard remarks entered


#### Total Remarks in SkyWatch per Year

In [83]:
# Read in Incidents_Cleaned.csv
faa_cleaned = pd.read_csv('C:/Users/grace/OneDrive/Desktop/GMU/DAEN690/Incidents_Cleaned.csv',encoding='cp1252')

In [85]:
# Split dateonly column into the four different years present in the data
year_2021 = []
year_2020 = []
year_2019 = []
year_2018 = []

for i in range(len(faa_cleaned)):
    if '2021' in faa_cleaned['dateonly'][i]:
        year_2021.append(faa_cleaned['dateonly'][i])
        
    elif '2020' in faa_cleaned['dateonly'][i]:
        year_2020.append(faa_cleaned['dateonly'][i])
    
    elif '2019' in faa_cleaned['dateonly'][i]:
        year_2019.append(faa_cleaned['dateonly'][i])
    
    else:
        year_2018.append(faa_cleaned['dateonly'][i])

date18_df = pd.DataFrame(year_2018)
date18_df.columns = ['Year 2018']

date19_df = pd.DataFrame(year_2019)
date19_df.columns = ['Year 2019']

date20_df = pd.DataFrame(year_2020)
date20_df.columns = ['Year 2020']

date21_df = pd.DataFrame(year_2021)
date21_df.columns = ['Year 2021']

In [87]:
print('Total Count of Remarks Input through SkyWatch, per Year\n\nYear 2018: ',
      len(date18_df), 'remarks entered\nYear 2019: ', len(date19_df), 'remarks entered\nYear 2020: ',
     len(date20_df), 'remarks entered\nYear 2021: ', len(date21_df), 'remarks entered')

Total Count of Remarks Input through SkyWatch, per Year

Year 2018:  2325 remarks entered
Year 2019:  2237 remarks entered
Year 2020:  1976 remarks entered
Year 2021:  2095 remarks entered


#### Visualizations

In [52]:
# Single year histogram for count of remarks with a standard format in SkyWatch
fig = px.histogram(date18_df, x="Year 2018", title = "Count of Remarks with Standard Format, 2018")
fig.update_layout(
    xaxis_title_text='Date', # xaxis label
    yaxis_title_text='Count of Remarks with Standard Format') # yaxis label
fig.show()

In [57]:
# Histograms with Subplots for each year
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=("2018", "2019", "2020", "2021"))

fig.add_trace(go.Histogram(x=date18_df['Year 2018']),
              row=1, col=1)

fig.add_trace(go.Histogram(x=date19_df['Year 2019']),
              row=1, col=2)

fig.add_trace(go.Histogram(x=date20_df['Year 2020']),
              row=2, col=1)

fig.add_trace(go.Histogram(x=date21_df['Year 2021']),
              row=2, col=2)

fig.update_layout(height=700, width=900,
                  title_text="Multiple Subplots with Titles")

fig.show()

## Extracting Geospatial Location Information from Structured Remarks

### Extracting Heading and Direction Information

In [143]:
# Create List that contains each Standard Remark, and the Heading/Direction
# information contained in each remark

remark_head_dir = []
remarks = faa_standard['REMARKS']

# regular expression for any heading/direction
#headir_regex = '[0-9][0-9]*[0-9]*\s?NM? [N|S|E|W|NW|NE|SW|SE|SSE|SSW|SNE|SNW|NNE|NSE|NNE|NNW|WSW|WNW|WSE|WNE|ENE|ESE|ESW|ENW]*'
headir_regex = '[0-9][0-9]*[0-9]*\s?NM* [N|S|E|W|NW|NE|SW|SE|SSE|SSW|SNE|SNW|NNE|NSE|NNE|NNW|WSW|WNW|WSE|WNE|ENE|ESE|ESW|ENW]*'

# Loop through all remarks and search for the heading/direction regex above
for i in range(len(remarks)):
    head_dir = re.findall(headir_regex, remarks[i])
    remark_head_dir.append(remarks[i])
    remark_head_dir.append(head_dir)

In [144]:
# Split Remarks and Heading/Directions into two seperate lists and create
# pandas dataframe
remark = []
head_dir = []

for i in range(0, len(remark_head_dir), 2):
    remark.append(remark_head_dir[i])
    head_dir.append(remark_head_dir[i+1])

remark_head_dir_df = pd.DataFrame()
remark_head_dir_df['REMARKS'] = remark
remark_head_dir_df['Heading_Direction'] = head_dir

In [145]:
remark_head_dir_df

Unnamed: 0,REMARKS,Heading_Direction
0,Aircraft observed alarge orange UAS with flash...,"[1 NM , 3 NM NNW]"
1,"Aircraft observed a UAS while at 5,000 feet 11...",[11 NM SSE]
2,"Aircraft observed a UAS while at 5,000 feet 11...",[11 NM SSE]
3,Aircraft observed a UAS 100 feet above the Air...,[2 NM ]
4,Aircraft observed a UAS operating 400 feet bel...,[]
...,...,...
5186,Aircraft reported a 50' diameter white balloon...,[35 NM NE]
5187,Aircraft observed a white UAS while S bound at...,[1 NM N]
5188,Aircraft reported a UAS sensor hit while N bou...,[21 NM NW]
5189,From MOR: Aircraft reported a NMAC with a flat...,[5 NM ENE]


In [146]:
# Export to .csv file
remark_head_dir_df.to_csv('remarks_heading_direction.csv', index = False)

### Extracting Airport Identifier Information

https://towardsdatascience.com/named-entity-recognition-ner-using-spacy-nlp-part-4-28da2ece57c6

In [None]:
# Import Airports dataset
airports = pd.read_csv('C:/Users/grace/OneDrive/Desktop/GMU/DAEN690/airports_cleaned.csv')

In [79]:
remarks_with_IDENT = []
remarks_sans_IDENT = []

faa_ident = airports['IDENT']
remarks = faa_standard['REMARKS']

for i in range(len(remarks)):
    for k in range(len(faa_ident)):
        if faa_ident[k] in remarks[i]:
               remarks_with_IDENT.append(remarks[i])
               remarks_with_IDENT.append(faa_ident[k])

KeyboardInterrupt: 

In [62]:
remark1 = nlp(faa_standard['REMARKS'][0])

for ent in remark1.ents:
    print(ent.text, ent.label_)

UAS ORG
1 CARDINAL
and400 feet QUANTITY
1,000 feet QUANTITY
3 CARDINAL
CLE ORG
Cleveland GPE
216 CARDINAL


In [63]:
faa_standard['REMARKS'][0]

'Aircraft observed alarge orange UAS with flashing lights 1 NM off the right side and400 feet below while at 1,000 feet 3 NM NNW of CLE. No evasive action was taken. Cleveland PD notified at 216-265-6055. '

In [66]:
remark2 = nlp(faa_standard['REMARKS'][1])

for ent in remark2.ents:
    print(ent.text, ent.label_)

UAS ORG
5,000 feet QUANTITY
11 CARDINAL
NM SSE ORG
SLC ORG


In [65]:
faa_standard['REMARKS'][1]

'Aircraft observed a UAS while at 5,000 feet 11 NM SSE of SLC. No evasive action was taken.'