## Import needed libraries

In [40]:
import pandas as pd
import numpy as np
import geopy
from geopy.distance import geodesic
import re

## Read in the csv file that has all remaining non-standard remarks

In [41]:
non_std = pd.read_csv('C:/Users/Olga/Desktop/GMU/DAEN690/LEGO/Non-Standard-RemarksNoHeadings.csv')
non_std

Unnamed: 0,REMARKS,UAS Location
0,W-ROC reports aircraft reported a white UAS at...,[]
1,Reported a red and white UAS passed helicopter...,[]
2,ATCT personnel reported observing a large UAS ...,[]
3,"Aircrew reported observing a light from a UAS,...",[]
4,Reported observing a UAS operating over downto...,[]
...,...,...
2644,"1335 EDT / 1735 UTC 8/23/2021\r\nHAVOC39, C130...",[]
2645,"1441 EDT / 1841 UTC 8/23/2021\r\nHAVOC39, C-1...",[]
2646,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,[]
2647,MA State PD observed a UAS over the airport at...,[]


## Extract records that have runway headings

In [42]:
non_std_rwy = non_std[non_std['REMARKS'].str.contains('RWY', regex=False, case=False, na=False) | (non_std['REMARKS'].str.contains('RUNWAY', regex=False, case=False, na=False))] 
non_std_rwy  # WE have 492 records 

Unnamed: 0,REMARKS,UAS Location
0,W-ROC reports aircraft reported a white UAS at...,[]
3,"Aircrew reported observing a light from a UAS,...",[]
7,Reported a red quad-copter UAS 2 feet in diame...,[]
9,A pedestrian reported observing a UAS hovering...,[]
10,Observed a large UAS 50 feet off the left side...,[]
...,...,...
2641,"1647 EDT / 2047 UTC 8/21/2021\r\nRA13U, P28A, ...",[]
2643,2155 EDT / 0155 UTC 8/23/2021\r\nChicago OHare...,[]
2646,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,[]
2647,MA State PD observed a UAS over the airport at...,[]


## Export these records to seperate csv file

In [4]:
non_std_rwy.to_csv('Non-Standard_Runways.csv', index = False)

### Extract the non-matching records to keep them in separate csv file for further investigation

In [43]:
non_match = pd.merge(non_std, non_std_rwy, how='outer',indicator=True, on ='REMARKS')
non_match

Unnamed: 0,REMARKS,UAS Location_x,UAS Location_y,_merge
0,W-ROC reports aircraft reported a white UAS at...,[],[],both
1,Reported a red and white UAS passed helicopter...,[],,left_only
2,ATCT personnel reported observing a large UAS ...,[],,left_only
3,"Aircrew reported observing a light from a UAS,...",[],[],both
4,Reported observing a UAS operating over downto...,[],,left_only
...,...,...,...,...
2650,"1335 EDT / 1735 UTC 8/23/2021\r\nHAVOC39, C130...",[],,left_only
2651,"1441 EDT / 1841 UTC 8/23/2021\r\nHAVOC39, C-1...",[],,left_only
2652,1756 EDT / 2156 UTC 8/23/2021\r\nChicago OHare...,[],[],both
2653,MA State PD observed a UAS over the airport at...,[],[],both


In [44]:
# remove the mathcing records
non_match = non_match[(non_match._merge != 'both')]
non_match

Unnamed: 0,REMARKS,UAS Location_x,UAS Location_y,_merge
1,Reported a red and white UAS passed helicopter...,[],,left_only
2,ATCT personnel reported observing a large UAS ...,[],,left_only
4,Reported observing a UAS operating over downto...,[],,left_only
5,UAS crashed in the vicinity of Harvard Coliseum.,[],,left_only
6,"Reported a 4-rotor UAS 10 NW of FLL at 2,500 f...",[],,left_only
...,...,...,...,...
2645,1555 Raleigh-Durham Airport PD observed a UAS ...,[],,left_only
2646,Ground personnel observed a UAS with a red top...,[],,left_only
2648,The Navy Yard advised that NSF Indianhead cont...,[],,left_only
2650,"1335 EDT / 1735 UTC 8/23/2021\r\nHAVOC39, C130...",[],,left_only


In [45]:
# Drop unnecessary columns
non_match = non_match.drop(['UAS Location_x', 'UAS Location_y', '_merge'], axis=1)
non_match

Unnamed: 0,REMARKS
1,Reported a red and white UAS passed helicopter...
2,ATCT personnel reported observing a large UAS ...
4,Reported observing a UAS operating over downto...
5,UAS crashed in the vicinity of Harvard Coliseum.
6,"Reported a 4-rotor UAS 10 NW of FLL at 2,500 f..."
...,...
2645,1555 Raleigh-Durham Airport PD observed a UAS ...
2646,Ground personnel observed a UAS with a red top...
2648,The Navy Yard advised that NSF Indianhead cont...
2650,"1335 EDT / 1735 UTC 8/23/2021\r\nHAVOC39, C130..."


In [19]:
# Export remaining 2157 records to csv file
non_match.to_csv('Non-Standard_NoRunways.csv', index = False)