In [1]:
#Install libraries
!pip install geopandas
!pip install geopy
!pip install shapely 
!pip install pandasql

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting geopandas
  Downloading geopandas-0.11.1-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting fiona>=1.8
  Downloading Fiona-1.8.21-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting shapely<2,>=1.7
  Downloading Shapely-1.8.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting pyproj>=2.6.1.post1
  Downloading pyproj-3.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB

In [2]:
#Import Libraries
import pandas as pd
import geopandas as ps
import geopy as gy
import shapely as sy
import dask.dataframe as dd
import pandasql as pq
from shapely.geometry import Point, Polygon
import numpy as np
from math import radians, cos, sin, asin, sqrt

In [53]:
#Setup the path for the file -- might have to change this if yours is named differently
bucket = 'daen-690-pacific-deviations/raw-data' #Bucket name
data_key = 'TOMRDate=2021-12-24.csv' #Path to the CSV file 
data_location = 's3://{}/{}'.format(bucket, data_key)

#Import all of the raw data 
rawData_df = dd.read_csv(data_location, assume_missing=True)

In [54]:
#Function to filter out the needed attribues, rename, change flight level scale, and filter for only those flights at or above flight level 240
def filterAttributes():
    #New dataframe with selected attributes from the raw data
    airspaceData_df = rawData_df[["FRN73TMRPDateTimeOfMessageRec","FRN131HRPWCFloatingPointLat","FRN131HRPWCFloatingPointLong",
                     "FRN145FLFlightLevel", "FRN170TITargetId","RESHSelectedHeading","FRN80TATargetAddress",
                     "FRN161TNTrackNumber"]]

    #Rename columns to make it easier to read
    airspaceData_df = airspaceData_df.rename(columns={'FRN73TMRPDateTimeOfMessageRec': 'DateTime', 
                                                      'FRN131HRPWCFloatingPointLat': "Latitude", 
                                                      'FRN131HRPWCFloatingPointLong': "Longitude", 
                                                      'FRN145FLFlightLevel': "FlightLevel", 
                                                      'FRN170TITargetId': "TargetID", 
                                                      'RESHSelectedHeading': "SelectedHeading", 
                                                      'FRN80TATargetAddress': "TargetAddress",
                                                      'FRN161TNTrackNumber': "TrackNumber"})
    
    
    #Change flight level scale to feet (FL1 = 100 ft)
    airspaceData_df['FlightLevel'] = airspaceData_df['FlightLevel'].apply(lambda x: x * 100, meta=('FlightLevel', 'float64'))
    
    airspaceData = airspaceData_df.compute()
    
    return airspaceData

In [55]:
#Function to format date and time  

def timeFormatting():
    
    #Set the dataframe that will be altered through this block of code
    global allAircraftData
    
    char = ['T','Z']
    for x in char:
        allAircraftData["DateTime"] = allAircraftData["DateTime"].str.replace( x ," ")

    # Formatted Datetime
    allAircraftData["DateTime"] = pd.to_datetime(allAircraftData["DateTime"], format="%Y-%m-%d %H:%M:%S")
    
    # Create 4 new columns for Hour, Minute, Second and Microsecond
    allAircraftData["Hour"] = allAircraftData["DateTime"].dt.hour
    allAircraftData["Minute"] = allAircraftData["DateTime"].dt.minute
    allAircraftData["Second"] = allAircraftData["DateTime"].dt.second
    allAircraftData["Day"] = allAircraftData["DateTime"].dt.strftime('%Y-%m-%d')
    #allAircraftData["microSecond"] = allAircraftData["DateTime"].dt.microsecond
    
    # Reorder columns
    allAircraftData = allAircraftData[["DateTime","Day","Hour","Minute","Second","Latitude","Longitude","FlightLevel",
                                   "TargetID","SelectedHeading","TargetAddress",
                                   "TrackNumber"]]

In [56]:
def dataFiltering():

    global allAircraftData

    #Remove anything below FL240
    airspaceData = allAircraftData[(allAircraftData['FlightLevel'] >= 24000)]

    #Keep only records for the first 5 seconds to speed up processing time 
    airspaceData = airspaceData[(airspaceData['Second'] < 5)]

    return airspaceData

In [57]:
#Function to filter out anything in the Hawaii airspace

def removeHISpace():
    
    #Set the dataframe that will be altered through this block of code
    global airspaceData
    
    #Coordinates for Hawaii airspace
    v0 = (26.14472222, -158.62194444) 
    v1 = (26.105, -160.63166667)
    v2 = (25.67611111, -161.69111111)
    v3 = (25.05666667, -162.64972222)
    v4 = (24.16889, -163.26638889)
    v5 = (23.25833, -163.855)
    v6 = (22.20555556, -163.91444444)

    #Select the correct v7 depending on what you are testing
    v7 = (33.10266389, 130.47177778) #Incorrect point to use during development
    #v7 = (21.1511111, -163.9144444) #Correct point to use when going live
    
    v8 = (20.11666667, -163.3)
    v9 = (19.65805556,-162.69944444)
    v10 = (19.415, -162.38361111)
    v11 = (18.40777778, -160.81416667)
    v12 = (18.0525, -160.26972222)
    v13 = (17.75583333, -159.53888889)
    v14 = (17.17055556, -157.75666667) 
    v15 = (17.805,-156.06805556)
    v16 = (18.10888889, -155.71166667)
    v17 = (19.14222222, -154.48333333)
    v18 = (19.22293333, -151.87963333)
    v19 = (20.69694444, -151.01916667) 
    v20 = (21.54777778, -151.46638889)
    v21 = (22.34416667,-151.88527778)
    v22 = (23.02416667, -152.57777778)
    v23 = (23.78055556, -153.36611111)
    v24 = (24.29583333, -154.25)
    v25 = (24.72138889, -155.26305556)
    v26 = (25.19583333, -156.42111111)

    # Polygon
    coords = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26]
    poly = Polygon(coords)
    
    #Sort flights into what is in the airspace and what is not
    hawaiiAir = []

    for loc in range(0,len(airspaceData)):
        p1 = Point(airspaceData.iloc[loc][5], airspaceData.iloc[loc][6])
        hawaiiAir.append(p1.within(poly))

    airspaceData['nearHawaii'] = hawaiiAir
    
    #Filter out only the ones in the airspace
    airspaceData = airspaceData[(airspaceData['nearHawaii'] == False)]
    airspaceData = airspaceData.drop(columns=['nearHawaii'])

In [58]:
#Function to set the direction of aircraft

def aircraftDirection():
    #Set the dataframe that will be altered through this block of code
    global airspaceData
    
    # Replace missing value with -1
    airspaceData['SelectedHeading'] = airspaceData['SelectedHeading'].fillna(-1)
    
    # Assign Direction "E" for 0-180 degree, "W" for 180-360 degree, "NA" is record with null values 
    conditionlist = [
        (airspaceData['SelectedHeading'] < 0) ,
        (airspaceData['SelectedHeading'] >= 0) & (airspaceData['SelectedHeading'] <180),
        (airspaceData['SelectedHeading'] > 180)]
    choicelist = ['NA', 'E', 'W']
    airspaceData['Direction'] = np.select(conditionlist, choicelist)

In [59]:
def minuteFilter(HourCounter,MinuteCounter):

    global airspaceData

    #create SQL query for flights between the start and end time
    sql1 = "SELECT *, min(Second) FROM airspaceData WHERE Hour = '{0}' and Minute = '{1}' GROUP BY TargetID ORDER BY TargetID, Second".format(HourCounter, MinuteCounter)

    #Run query and store results
    recordsInMinute = pq.sqldf(sql1, globals())
    del recordsInMinute['min(Second)']

    return (recordsInMinute.sort_values('Longitude').reset_index(drop=True))

In [60]:
# Implement the formula below
def distance_d(point0,pointX):
    # The function "radians" is found in the math module
    LoA = radians(point0[1])  
    LoB = radians(pointX[1])
    LaA=  radians(point0[0])  
    LaB = radians(pointX[0]) 
    # The "Haversine formula" is used.
    D_Lo = LoB - LoA 
    D_La = LaB - LaA 
    P = sin(D_La / 2)**2 + cos(LaA) * cos(LaB) * sin(D_Lo / 2)**2  
   
    Q = 2 * asin(sqrt(P))   
    # The earth's radius in kilometers.
    R_km = 6371  
 
    # Change the kilometer to  nautical miles
    R_nm = R_km*0.539956803

    # Then we'll compute the outcome.
    return(Q * R_nm)

In [61]:
# Create function to set up boundary within 25 nm by latitude, longitude 
def limit_lon(point0):
    '''
    use with LongitudeOrderDF
    note: distance from point to longitude boundary of each row is around 24.9715
    '''
    LaA = radians(point0[0])
    onedeg_long = cos(LaA)*(69.172*0.868976242)
    add = 25/onedeg_long 
    pointlimit = (point0[0],point0[1]+add)
    return pointlimit[1]

In [62]:
# Create function to select, merge and add the values from analyzing Longitude and Latitude
def newDF(OrderDF,x,y,d):
    """DF is Long/LatitudeOrderDF
       x = long/latpoint_a
       y = long/latpoint_b
       d = long/latdistance_ab"""
    # select rows that index is in list 'point_a', 'point_b'
    A = OrderDF.loc[x,['DateTime','Day','Hour','Minute','Second','Latitude','Longitude','FlightLevel',
                             'TargetID', 'SelectedHeading', 'TargetAddress','Direction']]
    B = OrderDF.loc[y,['DateTime','Day','Hour','Minute','Second','Latitude','Longitude','FlightLevel',
                             'TargetID', 'SelectedHeading', 'TargetAddress','Direction']]
    # Join 2 tables by the "TargetID" of point a (for the uniquness)
    OrderResult = pd.merge(A.reset_index(drop=True),B.reset_index(drop=True),left_index=True, right_index=True)
    # add distance column
    OrderResult['Distance'] = d
    return OrderResult

In [134]:
#Calculate the distance of the points closest to each other by longitidue and latitude
def proximityCalc(LongitudeOrderDF):
    longpoint_a = []
    longpoint_b = []
    longdistance_ab = []

    for a in LongitudeOrderDF.index:
        for n in range(1,len(LongitudeOrderDF)):
            b = a+n
            if b < len(LongitudeOrderDF):
                point0 = LongitudeOrderDF.loc[a,'Latitude'], LongitudeOrderDF.loc[a,'Longitude']
                pointX = LongitudeOrderDF.loc[b,'Latitude'], LongitudeOrderDF.loc[b,'Longitude']
                if pointX[1] <= limit_lon(point0): # Check if longitude of pointX is within the boundary
                    distance = distance_d(point0,pointX)
                    if distance <= 25: # Check distance within 25 nm
                        longpoint_a.append(a)
                        longpoint_b.append(b)
                        longdistance_ab.append(distance)
                    else:
                        break
        
    # Apply function to select and merge data frame
    Resultsdf = newDF(LongitudeOrderDF,longpoint_a, longpoint_b,longdistance_ab)

    return (Resultsdf)

In [64]:
#Calculating height differences
def distanceCalc(resultsDF):
    heightDifference = []
    potentialLoss1000 = []
    potentialLoss400 = []

    for counter in range(0,len(resultsDF)):
        difference = abs((resultsDF['FlightLevel_x'][counter]) - (resultsDF['FlightLevel_y'][counter]))
        heightDifference.append(difference)

        if difference < 1000:
            potentialLoss1000.append('True')
            if difference <= 400:
                potentialLoss400.append('True')
            else:
                potentialLoss400.append('False')
        else:
            potentialLoss1000.append('False')
            potentialLoss400.append('False')

    resultsDF['HeightDifference_ft'] = heightDifference
    resultsDF['potentialLoss400'] = potentialLoss400
    resultsDF['potentialLoss1000'] = potentialLoss1000

    return (resultsDF)

In [135]:
def removeProximityDups(proximityReport):
    
    proximityReport['TargetID_x'].replace('', np.nan, inplace=True)
    proximityReport['TargetID_y'].replace('', np.nan, inplace=True)

    proximityReport.dropna(subset=['TargetID_x'], inplace=True)
    proximityReport.dropna(subset=['TargetID_y'], inplace=True)
    
    proximityReport['list_target'] = proximityReport.apply(lambda row: tuple(sorted([row['TargetID_x']]+[row['TargetID_y']])), axis = 1)
    proximityReport = proximityReport.drop_duplicates(subset = 'list_target',keep = 'first').reset_index(drop = True)
    proximityReport.drop('list_target', axis=1, inplace=True)

    return proximityReport

In [65]:
def getProximityReport():

    proximityReport = pd.DataFrame()

    for HourCounter in range(0,24):
        print(HourCounter)
        #Create table for the minute
        for MinuteCounter in range(0,60):
            print("---",MinuteCounter)
            #Create table for the minute
            recordsByMinuteDF = minuteFilter(HourCounter,MinuteCounter)

            #calculate proximity
            resultsDF = proximityCalc(recordsByMinuteDF)

            if resultsDF.empty == True:
                print(MinuteCounter)
                # if the results dataframe is empty, then break out of for-loop
                break
            else:
                #Calculate distance
                resultsDF = distanceCalc(resultsDF)
                #Add the results for this minute to the overall results 
                proximityReport = pd.concat([proximityReport, resultsDF], ignore_index=True)

    proximityReport = removeProximityDups(proximityReport)
    
    return proximityReport

In [110]:
def get400candidate():
    LossCandidates400 = proximityReport.loc[(proximityReport['potentialLoss400'] == 'True')]
    LossCandidates400 = LossCandidates400.reset_index()
    LossCandidates400 = LossCandidates400.drop(columns=['index'])
    
    if len(LossCandidates400) > 0:
        #remove duplicate pairs
        LossCandidates400['list_target'] = LossCandidates400.apply(lambda row: tuple(sorted([row['TargetID_x']]+[row['TargetID_y']])), axis = 1)
        LossCandidates400 = LossCandidates400.drop_duplicates(subset = ['list_target'],keep = 'last').reset_index(drop = True)
        LossCandidates400.drop('list_target', axis=1, inplace=True)

    return LossCandidates400

In [68]:
def get1000candidate():
    LossCandidates1000 = proximityReport.loc[(proximityReport['potentialLoss1000'] == 'True')]
    LossCandidates1000 = LossCandidates1000.reset_index()
    LossCandidates1000 = LossCandidates1000.drop(columns=['index'])
    
    if len(LossCandidates1000) > 0:
        #remove duplicate pairs
        LossCandidates1000['list_target'] = LossCandidates1000.apply(lambda row: tuple(sorted([row['TargetID_x']]+[row['TargetID_y']])), axis = 1)
        LossCandidates1000 = LossCandidates1000.drop_duplicates(subset = ['list_target'],keep = 'last').reset_index(drop = True)
        LossCandidates1000.drop('list_target', axis=1, inplace=True)


    return LossCandidates1000

In [232]:
def recordsTable(instancesAtLevel, x):

    flight_x = instancesAtLevel['TargetID_x'][x]
    flight_y = instancesAtLevel['TargetID_y'][x]
    hour = instancesAtLevel['Hour_x'][x]
    minute = instancesAtLevel['Minute_x'][x]

    flightInformation = allAircraftData.loc[((allAircraftData['TargetID'] == flight_x) | (allAircraftData['TargetID'] == flight_y)) & 
                                          ((allAircraftData['Minute'] >= (minute - 5)) & (allAircraftData['Minute'] <= (minute + 5))) & 
                                          ((allAircraftData['Hour'] == hour))]
  
    flightInformation = flightInformation.assign(SeparationEntry=x)

    return flightInformation.sort_values(by=['TargetID','Minute', 'Second'])  

In [71]:
# function for fill missing second with linear interpolation
def fillSecond(data_x,data_y):
    '''This function transform data of target y to be 
      on the same minute, second as target x'''
    # filled with NA in data_y if second_x are not in second_y 
    Y = data_y.groupby('Minute')['Second'].apply(list).reset_index(name='list')
    for i in data_x.index:
        min_x = data_x.loc[i,'Minute']
        sec_x = data_x.loc[i,'Second']
        for n in range(0,len(Y)):
            min_y = Y.loc[n,'Minute']
            if min_x == min_y:
                listsec = Y.loc[n,'list']
                if (sec_x not in listsec):
                    ydict = {'Minute': min_x, 'Second': sec_x, 
                   'TargetID': data_y.loc[0,'TargetID']}
                    data_y = data_y.append(ydict, ignore_index = True)

    # fill NA with linear interpolation method
    y_interp = data_y.sort_values(by=['Minute','Second']).interpolate(method='linear', limit_direction ='forward')
    y_transformed = y_interp.interpolate(method='linear', limit_direction ='backward')
    return y_transformed

In [72]:
def transformTable(flightData):
    for i, id in enumerate(flightData['TargetID'].unique()):
        if i == 0:
            data_x = flightData[(flightData['TargetID']== id)].reset_index(drop = True)
        else:
            data_y = flightData[(flightData['TargetID']== id)].reset_index(drop = True)

    data_x = data_x[['SeparationEntry','DateTime','Day','Minute','Second','Latitude','Longitude','FlightLevel','TargetID','SelectedHeading']]
    data_y = data_y[['Minute','Second','Latitude','Longitude','FlightLevel','TargetID','SelectedHeading']]

    y_transformed = fillSecond(data_x,data_y)

    analyzedTable = pd.merge(data_x,y_transformed,on=['Minute','Second'], how='left')

    return analyzedTable


In [73]:
# Append the lateral Distance to table
def haversineAnalysis(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):

    if to_radians:
        lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])

    a = np.sin((lat2-lat1)/2.0)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2

    return earth_radius * 2 * np.arcsin(np.sqrt(a))  * 0.539956803 

In [74]:
def getLateralDist(analyzedTable):

    analyzedTable['LateralDistance'] = \
    haversineAnalysis(analyzedTable.Latitude_x, analyzedTable.Longitude_x,
                 analyzedTable.Latitude_y, analyzedTable.Longitude_y)
  
    return analyzedTable

In [75]:
#Append the Flight Level differnece column 
def flightlevelCalc(analyzedTable):

    flightlevelDifference = []

    for counter in range(0,len(analyzedTable)):
        Diff = abs((analyzedTable['FlightLevel_x'][counter]) - (analyzedTable['FlightLevel_y'][counter]))
        flightlevelDifference.append(Diff)

    
    analyzedTable['FlightLevelDifference'] = flightlevelDifference

    return analyzedTable

In [76]:
def getDirection(analyzedTable):

    # Direction
    conditionsX = [(analyzedTable.iloc[-1]['Longitude_x'] - analyzedTable.iloc[0]['Longitude_x'] < 0),(analyzedTable.iloc[-1]['Longitude_x'] - analyzedTable.iloc[0]['Longitude_x'] > 0)]

    # create a list of the values we want to assign for each condition
    values = ['W', 'E']

    # create a new column and use np.select to assign values to it using our lists as arguments
    analyzedTable['X_direction'] = np.select(conditionsX, values)

    conditionsY = [
      (analyzedTable.iloc[-1]['Longitude_y'] - analyzedTable.iloc[0]['Longitude_y'] < 0),
      (analyzedTable.iloc[-1]['Longitude_y'] - analyzedTable.iloc[0]['Longitude_y'] > 0)
      ]

    # create a list of the values we want to assign for each condition
    values = ['W', 'E']

    # create a new column and use np.select to assign values to it using our lists as arguments
    analyzedTable['Y_direction'] = np.select(conditionsY, values)

    analyzedTable = analyzedTable[analyzedTable.columns[[0,1,2,3,4,5,6,7,8,9,17,10,11,12,13,14,18,15,16]]]

    return analyzedTable

In [77]:
def getSeaparationReports(instancesAtLevel):
    
    separationReport = pd.DataFrame()

    for x in range(0,len(instancesAtLevel.index)):
        #Get the data for the flight at +/- 3 minutes from when the loss of separation was flagged to be under 400 ft
        flightData = recordsTable(instancesAtLevel, x)

        #Format the table for output
        analyzedTable = transformTable(flightData)

        #Compute/assign lateral separation, height separation, and direction
        analyzedTable = getLateralDist(analyzedTable)
        analyzedTable = flightlevelCalc(analyzedTable)
        analyzedTable = getDirection(analyzedTable)

        #Add table to the results 
        separationReport = pd.concat([separationReport, analyzedTable], ignore_index=True)

    return separationReport

In [198]:
def flightXInfo(separationData):

    flightX = []

    for x in range(0, len(separationData.index)):
        values_x = [separationData['SeparationEntry'].loc[x], 
                separationData['DateTime'].loc[x], 
                separationData['FlightLevel_x'].loc[x], 
                separationData['TargetID_x'].loc[x], 
                separationData['X_direction'].loc[x], 
                separationData['LateralDistance'].loc[x], 
                separationData['FlightLevelDifference'].loc[x], 
                separationData['Longitude_x'].loc[x], 
                separationData['Latitude_x'].loc[x]]
        flightX.append(values_x)

    return flightX

In [197]:
def flightYInfo(separationData):

    flightY = []

    for x in range(0, len(separationData.index)):
        values_y = [separationData['SeparationEntry'].loc[x],
                separationData['DateTime'].loc[x],
                separationData['FlightLevel_y'].loc[x], 
                separationData['TargetID_y'].loc[x], 
                separationData['Y_direction'].loc[x], 
                separationData['LateralDistance'].loc[x], 
                separationData['FlightLevelDifference'].loc[x],
                separationData['Longitude_y'].loc[x],
                separationData['Latitude_y'].loc[x]]
        flightY.append(values_y)

    return flightY

In [80]:
def getVisTable(Resulttable):
    
    xvalues = pd.DataFrame(flightXInfo(Resulttable))
    yvalues = pd.DataFrame(flightYInfo(Resulttable))
    tableToVisualize = pd.concat([xvalues, yvalues], ignore_index=True)
    tableToVisualize = tableToVisualize.rename(columns={0: 'SeparationEntry',
                                                      1: 'DateTime', 
                                                      2: "FlightLevel", 
                                                      3: "TargetID", 
                                                      4: "Direction", 
                                                      5: "LateralDistance",
                                                      6: "FLDifference",
                                                      7: "Longitude", 
                                                      8: "Latitude"})

    return tableToVisualize

In [225]:
#Get the report at the 1000 level
def getSeparation1000Report():

    flSeparation1000Report = get1000candidate()

    if len(flSeparation1000Report) > 0:
        flSeparation1000Report = getSeaparationReports(flSeparation1000Report)
    
    flSeparation1000Report['TargetID_x'].replace('', np.nan, inplace=True)
    flSeparation1000Report['TargetID_y'].replace('', np.nan, inplace=True)

    flSeparation1000Report.dropna(subset=['TargetID_x'], inplace=True)
    flSeparation1000Report.dropna(subset=['TargetID_y'], inplace=True)
    
    return flSeparation1000Report.reset_index(drop=True)

In [248]:
#Get the visualization data at the 1000 level
def visualization1000(report1000):
    
    if len(report1000) > 0:
        viz1000Data = getVisTable(report1000)
        conditions = [
            (viz1000Data['FLDifference'] <= 1000) & (viz1000Data['LateralDistance'] <= 25),
            (viz1000Data['FLDifference'] <= 1000) & (viz1000Data['LateralDistance'] > 25),
            (viz1000Data['FLDifference'] > 1000) & (viz1000Data['LateralDistance'] <= 25),
            (viz1000Data['FLDifference'] > 1000) & (viz1000Data['LateralDistance'] > 25),
        ]
        values = ['True','False','False','False']
        viz1000Data['potentialLoss'] = np.select(conditions, values)
    else:
        column_names = ['SeparationEntry', 'DateTime','FlightLevel','TargetID','Direction','LateralDistance',
                       'FLDifference', 'Longitude', 'Latitude']
        viz1000Data = pd.DataFrame(columns = column_names)
    
    return viz1000Data

In [216]:
#Get the report at the 400 level
def getSeparation400Report():
    
    flSeparation400Report = get400candidate()

    if len(flSeparation400Report) > 0:
        flSeparation400Report = getSeaparationReports(flSeparation400Report)
        
    flSeparation400Report['TargetID_x'].replace('', np.nan, inplace=True)
    flSeparation400Report['TargetID_y'].replace('', np.nan, inplace=True)

    flSeparation400Report.dropna(subset=['TargetID_x'], inplace=True)
    flSeparation400Report.dropna(subset=['TargetID_y'], inplace=True)
    
    return flSeparation400Report.reset_index(drop=True)

In [244]:
#Get the visualization data at the 400 level
def visualization400(report400):
    
    if len(report400) > 0:
        viz400Data = getVisTable(report400)
        conditions = [
            (viz400Data['FLDifference'] <= 400) & (viz400Data['LateralDistance'] <= 25),
            (viz400Data['FLDifference'] <= 400) & (viz400Data['LateralDistance'] > 25),
            (viz400Data['FLDifference'] > 400) & (viz400Data['LateralDistance'] <= 25),
            (viz400Data['FLDifference'] > 400) & (viz400Data['LateralDistance'] > 25),
        ]
        values = ['True','False', 'False','False']
        viz400Data['potentialLoss'] = np.select(conditions, values)
    else:
        column_names = ['SeparationEntry', 'DateTime','FlightLevel','TargetID','Direction','LateralDistance',
                       'FLDifference', 'Longitude', 'Latitude']
        viz400Data = pd.DataFrame(columns = column_names)
    
    return viz400Data

In [241]:
#Functions to call for the data cleanup

#Save raw data with the time formatting
allAircraftData = filterAttributes()
timeFormatting()
airspaceData = dataFiltering()
removeHISpace()
aircraftDirection()

In [249]:
#Functions to call for the reports
#proximityReport = getProximityReport()
separation400Report = getSeparation400Report()
visualization400Report = visualization400(separation400Report)
separation1000Report = getSeparation1000Report()
visualization1000Report = visualization1000(separation1000Report)

In [222]:
proximityReport

Unnamed: 0,DateTime_x,Day_x,Hour_x,Minute_x,Second_x,Latitude_x,Longitude_x,FlightLevel_x,TargetID_x,SelectedHeading_x,...,Longitude_y,FlightLevel_y,TargetID_y,SelectedHeading_y,TargetAddress_y,Direction_y,Distance,HeightDifference_ft,potentialLoss400,potentialLoss1000
0,2021-12-24 00:00:02.984000,2021-12-24,0,0,2,20.830261,-156.335646,32000.0,ASA817,257.343750,...,-156.083303,29575.0,UAL1224,255.937500,AA3AE4,W,14.204776,2425.0,False,False
1,2021-12-24 00:00:03.688000,2021-12-24,0,0,3,25.288479,-154.688899,36000.0,ASA103,220.781250,...,-154.562401,33975.0,HAL61,-1.000000,A1A546,,10.309552,2025.0,False,False
2,2021-12-24 00:00:03.898000,2021-12-24,0,0,3,27.520614,-148.661447,38000.0,AAL229,227.812500,...,-148.558839,30000.0,AAL684,0.000000,A4F1E5,E,6.390620,8000.0,False,False
3,2021-12-24 00:00:01.078000,2021-12-24,0,0,1,51.292745,-145.250855,35000.0,EVA016,101.250000,...,-144.992808,37000.0,CAL004,92.109375,899021,E,18.123251,2000.0,False,False
4,2021-12-24 00:00:01.055000,2021-12-24,0,0,1,40.963316,-139.369687,35000.0,ANA9653,265.078125,...,-138.991516,32000.0,KAL8208,272.812500,71C043,W,19.032024,3000.0,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050,2021-12-24 23:56:03.547000,2021-12-24,23,56,3,27.921204,-151.669110,36000.0,ASA879,215.156250,...,-151.419283,31025.0,ASA896,35.156250,ACCD10,E,17.444224,4975.0,False,False
1051,2021-12-24 23:56:03.547000,2021-12-24,23,56,3,27.921204,-151.669110,36000.0,ASA879,215.156250,...,-151.397306,33000.0,ASA832,-1.000000,A78C43,,19.989689,3000.0,False,False
1052,2021-12-24 23:57:04.156000,2021-12-24,23,57,4,31.063424,-132.270502,44975.0,LXJ587,239.765625,...,-132.206671,35975.0,ANZ1007,222.187500,C820CE,W,24.395061,9000.0,False,False
1053,2021-12-24 23:57:03.188000,2021-12-24,23,57,3,43.035542,-130.106572,33000.0,ANA9643,281.953125,...,-129.723944,34000.0,DAL565,213.750000,A6CC41,W,21.978028,1000.0,False,False


In [250]:
separation400Report

Unnamed: 0,SeparationEntry,DateTime,Day,Minute,Second,Latitude_x,Longitude_x,FlightLevel_x,TargetID_x,SelectedHeading_x,X_direction,Latitude_y,Longitude_y,FlightLevel_y,TargetID_y,SelectedHeading_y,Y_direction,LateralDistance,FlightLevelDifference
0,0,2021-12-24 01:35:04.313,2021-12-24,35,4,20.959396,-155.805899,16975.0,SWA1436,71.015625,E,21.295461,-154.660034,37112.5,TWY131,,W,67.270761,20137.5
1,0,2021-12-24 01:35:09.461,2021-12-24,35,9,20.960797,-155.796270,17150.0,SWA1436,71.015625,E,21.289338,-154.666731,36937.5,TWY131,,W,66.264273,19787.5
2,0,2021-12-24 01:35:13.961,2021-12-24,35,13,20.962054,-155.787831,17325.0,SWA1436,71.015625,E,21.282921,-154.673767,36762.5,TWY131,,W,65.300974,19437.5
3,0,2021-12-24 01:35:19.258,2021-12-24,35,19,20.963543,-155.777893,17500.0,SWA1436,71.015625,E,21.273010,-154.684554,36500.0,TWY131,,W,63.992445,19000.0
4,0,2021-12-24 01:35:24.484,2021-12-24,35,24,20.964981,-155.768084,17650.0,SWA1436,71.015625,E,21.263222,-154.695248,36225.0,TWY131,,W,62.700249,18575.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1262,10,2021-12-24 23:33:41.977,2021-12-24,33,41,22.807297,-154.273066,41000.0,EDG141,,E,22.015316,-154.084694,41000.0,SWA2191,68.203125,E,48.686839,0.0
1263,10,2021-12-24 23:33:46.922,2021-12-24,33,46,22.815764,-154.265238,41000.0,EDG141,,E,22.016895,-154.074656,41000.0,SWA2191,68.203125,E,49.117011,0.0
1264,10,2021-12-24 23:33:51.469,2021-12-24,33,51,22.823445,-154.258169,41000.0,EDG141,,E,22.018410,-154.064900,41000.0,SWA2191,68.203125,E,49.510631,0.0
1265,10,2021-12-24 23:33:55.766,2021-12-24,33,55,22.830754,-154.251404,41000.0,EDG141,,E,22.019874,-154.055437,41000.0,SWA2191,68.203125,E,49.885585,0.0


In [251]:
visualization400Report

Unnamed: 0,SeparationEntry,DateTime,FlightLevel,TargetID,Direction,LateralDistance,FLDifference,Longitude,Latitude,potentialLoss
0,0,2021-12-24 01:35:04.313,16975.0,SWA1436,E,67.270761,20137.5,-155.805899,20.959396,False
1,0,2021-12-24 01:35:09.461,17150.0,SWA1436,E,66.264273,19787.5,-155.796270,20.960797,False
2,0,2021-12-24 01:35:13.961,17325.0,SWA1436,E,65.300974,19437.5,-155.787831,20.962054,False
3,0,2021-12-24 01:35:19.258,17500.0,SWA1436,E,63.992445,19000.0,-155.777893,20.963543,False
4,0,2021-12-24 01:35:24.484,17650.0,SWA1436,E,62.700249,18575.0,-155.768084,20.964981,False
...,...,...,...,...,...,...,...,...,...,...
2529,10,2021-12-24 23:33:41.977,41000.0,SWA2191,E,48.686839,0.0,-154.084694,22.015316,False
2530,10,2021-12-24 23:33:46.922,41000.0,SWA2191,E,49.117011,0.0,-154.074656,22.016895,False
2531,10,2021-12-24 23:33:51.469,41000.0,SWA2191,E,49.510631,0.0,-154.064900,22.018410,False
2532,10,2021-12-24 23:33:55.766,41000.0,SWA2191,E,49.885585,0.0,-154.055437,22.019874,False


In [252]:
separation1000Report

Unnamed: 0,SeparationEntry,DateTime,Day,Minute,Second,Latitude_x,Longitude_x,FlightLevel_x,TargetID_x,SelectedHeading_x,X_direction,Latitude_y,Longitude_y,FlightLevel_y,TargetID_y,SelectedHeading_y,Y_direction,LateralDistance,FlightLevelDifference
0,0,2021-12-24 00:14:00.141,2021-12-24,14,0,21.592529,-156.957076,19400.0,DAL495,45.703125,E,21.335682,-157.356262,18250.0,UAL410,45.00000,E,27.117001,1150.0
1,0,2021-12-24 00:14:05.969,2021-12-24,14,5,21.599029,-156.946689,19525.0,DAL495,45.703125,E,21.338428,-157.351990,18362.5,UAL410,45.00000,E,27.525616,1162.5
2,0,2021-12-24 00:14:10.195,2021-12-24,14,10,21.603823,-156.939087,19625.0,DAL495,45.703125,E,21.346296,-157.339579,18650.0,UAL410,45.00000,E,27.198849,975.0
3,0,2021-12-24 00:14:14.281,2021-12-24,14,14,21.608431,-156.931712,19700.0,DAL495,45.703125,E,21.348530,-157.335994,18725.0,UAL410,45.00000,E,27.453615,975.0
4,0,2021-12-24 00:14:18.297,2021-12-24,14,18,21.612947,-156.924540,19775.0,DAL495,45.703125,E,21.353132,-157.328663,18862.5,UAL410,45.00000,E,27.442787,912.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5396,39,2021-12-24 23:45:39.555,2021-12-24,45,39,21.500104,-155.279999,40000.0,HAL54,,E,21.538330,-154.853144,37000.0,SWA2183,71.71875,E,23.952310,3000.0
5397,39,2021-12-24 23:45:43.758,2021-12-24,45,43,21.501205,-155.269775,40000.0,HAL54,,E,21.539474,-154.842757,37000.0,SWA2183,71.71875,E,23.961482,3000.0
5398,39,2021-12-24 23:45:49.125,2021-12-24,45,49,21.502533,-155.257241,40000.0,HAL54,,E,21.541032,-154.828312,37000.0,SWA2183,71.71875,E,24.068797,3000.0
5399,39,2021-12-24 23:45:53.188,2021-12-24,45,53,21.503549,-155.247752,40000.0,HAL54,,E,21.542118,-154.818293,37000.0,SWA2183,71.71875,E,24.098458,3000.0


In [253]:
visualization1000Report

Unnamed: 0,SeparationEntry,DateTime,FlightLevel,TargetID,Direction,LateralDistance,FLDifference,Longitude,Latitude,potentialLoss
0,0,2021-12-24 00:14:00.141,19400.0,DAL495,E,27.117001,1150.0,-156.957076,21.592529,False
1,0,2021-12-24 00:14:05.969,19525.0,DAL495,E,27.525616,1162.5,-156.946689,21.599029,False
2,0,2021-12-24 00:14:10.195,19625.0,DAL495,E,27.198849,975.0,-156.939087,21.603823,False
3,0,2021-12-24 00:14:14.281,19700.0,DAL495,E,27.453615,975.0,-156.931712,21.608431,False
4,0,2021-12-24 00:14:18.297,19775.0,DAL495,E,27.442787,912.5,-156.924540,21.612947,False
...,...,...,...,...,...,...,...,...,...,...
10797,39,2021-12-24 23:45:39.555,37000.0,SWA2183,E,23.952310,3000.0,-154.853144,21.538330,False
10798,39,2021-12-24 23:45:43.758,37000.0,SWA2183,E,23.961482,3000.0,-154.842757,21.539474,False
10799,39,2021-12-24 23:45:49.125,37000.0,SWA2183,E,24.068797,3000.0,-154.828312,21.541032,False
10800,39,2021-12-24 23:45:53.188,37000.0,SWA2183,E,24.098458,3000.0,-154.818293,21.542118,False


In [254]:
#Export file
proximityReport.to_csv('proximityReport.csv', index=False, header=True)
separation400Report.to_csv('separation400Report.csv', index=False, header=True)
visualization400Report.to_csv('visualization400Report.csv', index=False, header=True)
separation1000Report.to_csv('separation1000Report.csv', index=False, header=True)
visualization1000Report.to_csv('visualization1000Report.csv', index=False, header=True)


In [255]:
import sagemaker, boto3, os
bucket = sagemaker.Session().default_bucket()
prefix = "demo-sagemaker-2021-12-24"

boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/proximityReport.csv')).upload_file('proximityReport.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/separation400Report.csv')).upload_file('separation400Report.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/visualization400Report.csv')).upload_file('visualization400Report.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/separation1000Report.csv')).upload_file('separation1000Report.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/visualization1000Report.csv')).upload_file('visualization1000Report.csv')