In [48]:
from mpl_toolkits.basemap import Basemap
from multiprocessing import Process,Array,Value
from collections import defaultdict
from scipy.spatial import distance as dst
import matplotlib.pyplot as plt
import shapely.geometry as sp
import numpy as np
import pandas as pd
import pickle as pk
import datetime as dt
import threading
import itertools
import math
import time
import json
import copy

<div>
    <h1>Dataset Cleaning</h1>
</div>

<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell reads the Flight_Tracks.csv which is the actual paths taken by all flights in the month of August 2013 </p></font><br>
<font size="3">
<b>Input</b> &rarr; Flight_Tracks.csv- (~81 Million Rows) <br>
<b>Output</b> &rarr; Tracks Dataframe </font>
</div>
 

In [1]:
Tracks=pd.read_csv("Flight_Tracks.csv")
Tracks

Unnamed: 0,id,received,callsign,altitude,ground_speed,latitude_degrees,longitude_degrees,flighthistory_id
0,108923402,2013-09-09 16:27:54+00,AWE404,37000.0,454,39.330002,-91.730003,308536896
1,108923403,2013-09-09 16:27:54+00,EJA955,43000.0,496,39.130001,-95.970001,308759273
2,108923404,2013-09-09 16:27:54+00,JBU223,36000.0,467,37.169998,-98.980003,308510756
3,108923405,2013-09-09 16:27:54+00,UAL436,37000.0,474,38.919998,-97.529999,308546747
4,108923406,2013-09-09 16:28:00+00,SKW6407,33000.0,451,43.169998,-115.269997,308543571
...,...,...,...,...,...,...,...,...
81406058,15,2013-08-21 16:19:15+00,SKW4595,38000.0,444,40.529999,-117.000000,306674139
81406059,20,2013-08-21 16:19:15+00,SWA445,38000.0,421,40.400002,-107.230003,306650840
81406060,22,2013-08-21 16:19:19+00,AWI3772,3000.0,257,40.830002,-73.750000,306663759
81406061,24,2013-08-21 16:19:17+00,SKW4522,8700.0,272,40.799999,-112.120003,306673932


<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell drops the unnecessary columns and retains the columns needed, cleans the time column to represent a datetime object, sorts by the time column and outputs the cleaned dataset cleanTracks.csv </p></font><br>
<font size="3">
<b>Input</b> &rarr; Tracks Dataframe <br>
<b>Output</b> &rarr;cleanTracks.csv </font>
</div>

In [None]:
Tracks.sort_values(by=['flighthistory_id'],inplace=True)
Tracks.drop(['id','callsign','altitude'],axis=1,inplace=True)
col='received'
series=list(Tracks[col])
cleanedSeries=[]
for date in series:
    cleanDate=date.split('+')[0]
    cleanedSeries.append(dt.datetime.fromisoformat(cleanDate))
Tracks[col]=cleanedSeries
Tracks.sort_values(by=['received'],inplace=True)
Tracks=Tracks.reset_index()
Tracks.drop(['index'],axis=1,inplace=True)
Tracks.to_csv("Outputs/realTracks.csv",index=False)

In [3]:
cleanTracks=pd.read_csv("Outputs/cleanTracks.csv")
cleanTracks

Unnamed: 0,received,ground_speed,latitude_degrees,longitude_degrees,flighthistory_id
0,2013-08-14 08:30:56,172,36.207200,-79.425000,306095730
1,2013-08-14 08:31:56,187,36.259400,-79.430000,306095730
2,2013-08-14 08:32:53,214,36.326700,-79.418100,306095730
3,2013-08-14 08:33:53,191,36.363100,-79.383600,306095730
4,2013-08-14 08:34:59,186,36.402200,-79.336400,306095730
...,...,...,...,...,...
81406058,2013-09-11 13:57:30,316,42.680000,-106.449997,308962199
81406059,2013-09-11 13:58:31,279,42.700001,-106.529999,308962199
81406060,2013-09-11 13:59:32,250,42.720001,-106.620003,308962199
81406061,2013-09-11 14:24:46,130,37.630001,-122.099998,308960729


<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cells reads the Flight_History.csv which has the corresponding information about all the flights that took place in the month of August 2013 and we clean the csv file in order to remove unnecessary columns, drop NULL rows etc. and output the cleaned csv</p></font><br>
<font size="3">
<b>Input</b> &rarr; Flight_History.csv <br>
<b>Output</b> &rarr; cleanFlightHistory.csv</font>
</div>

In [61]:
Flights=pd.read_csv("Outputs/Flight_History.csv",low_memory=False)
Flights

Unnamed: 0,id,airline_code,airline_icao_code,flight_number,departure_airport_code,departure_airport_icao_code,arrival_airport_code,arrival_airport_icao_code,published_departure,published_arrival,...,scheduled_runway_arrival,actual_runway_arrival,creator_code,scheduled_air_time,scheduled_block_time,departure_airport_timezone_offset,arrival_airport_timezone_offset,scheduled_aircraft_type,actual_aircraft_type,icao_aircraft_type_actual
0,307025132,G4,AAY,565,EUG,KEUG,LAS,KLAS,2013-08-26 02:25:00+00,2013-08-26 04:20:00+00,...,,,I,,115.0,-7,-7,M80,,
1,307089377,9K,KAP,3312,BOS,KBOS,MVY,KMVY,,,...,,,D,,37.0,-4,-4,,,
2,307391933,AA,,23,DEN,KDEN,DFW,KDFW,,,...,2013-08-26 17:31:00+00,,D,111.0,115.0,-6,-5,,,
3,306201343,ZW,AWI,19,DCA,KDCA,ORF,KORF,,,...,2013-08-15 18:14:00+00,,A,29.0,,-4,-4,,,
4,306690299,5A,AIP,5103,GTF,KGTF,BIL,KBIL,,,...,2013-08-19 09:17:00+00,,A,42.0,,-6,-6,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711571,308738047,UA,UAL,1197,SFO,KSFO,IAH,KIAH,2013-09-11 07:23:00+00,2013-09-11 10:59:00+00,...,2013-09-11 10:58:00+00,2013-09-11 10:54:00+00,I,203.0,216.0,-7,-5,738,,B738
711572,308710521,UA,UAL,1540,LAS,KLAS,IAH,KIAH,2013-09-11 07:33:00+00,2013-09-11 10:22:00+00,...,2013-09-11 10:11:00+00,2013-09-11 09:59:00+00,I,148.0,169.0,-7,-5,738,,B738
711573,308711065,UA,UAL,1614,LAX,KLAX,IAH,KIAH,2013-09-11 07:20:00+00,2013-09-11 10:28:00+00,...,2013-09-11 10:25:00+00,2013-09-11 10:44:00+00,I,168.0,188.0,-7,-5,739,,B739
711574,308710908,US,AWE,436,LAX,KLAX,CLT,KCLT,2013-09-11 07:35:00+00,2013-09-11 12:16:00+00,...,2013-09-11 12:01:00+00,2013-09-11 11:53:00+00,I,258.0,281.0,-7,-4,321,,A321


In [62]:
Flights=Flights[['id','departure_airport_icao_code','arrival_airport_icao_code','scheduled_runway_departure','actual_runway_departure','scheduled_runway_arrival','actual_runway_arrival']]
Flights=Flights.dropna()
Flights=Flights.reset_index()
Flights.drop(['index'], axis = 1, inplace = True)
ColsToClean=['scheduled_runway_departure','actual_runway_departure','scheduled_runway_arrival','actual_runway_arrival',]
for col in ColsToClean:
    series=list(Flights[col])
    cleanedSeries=[]
    for date in series:
        cleanDate=str(date).split('+')[0]
        cleanedSeries.append(dt.datetime.fromisoformat(cleanDate))
    Flights[col]=cleanedSeries
badRows=Flights[(Flights["departure_airport_icao_code"] =='KALX') | (Flights["departure_airport_icao_code"] =='KPLB')| (Flights["arrival_airport_icao_code"] =='KALX') | (Flights["arrival_airport_icao_code"] =='KPLB')].index
Flights.drop(badRows,inplace=True)
Flights.sort_values(by=['scheduled_runway_departure'],inplace=True)
Flights=Flights.reset_index()
Flights.drop(['index'],axis=1,inplace=True)
Flights.to_csv("Outputs/cleanFlightHistory.csv",index=False)

In [63]:
cleanFlights=pd.read_csv("Outputs/cleanFlightHistory.csv")
cleanFlights

Unnamed: 0,id,departure_airport_icao_code,arrival_airport_icao_code,scheduled_runway_departure,actual_runway_departure,scheduled_runway_arrival,actual_runway_arrival
0,305966493,KSDF,KDEC,2013-08-14 08:40:00,2013-08-14 09:14:00,2013-08-14 09:44:00,2013-08-14 10:12:00
1,306196254,KSDF,KBHM,2013-08-14 09:00:00,2013-08-14 09:04:00,2013-08-14 09:47:00,2013-08-14 09:51:00
2,306196711,KMEM,KCOS,2013-08-14 09:00:00,2013-08-14 09:14:00,2013-08-14 11:01:00,2013-08-14 11:18:00
3,306196678,KMEM,KMIA,2013-08-14 09:00:00,2013-08-14 09:09:00,2013-08-14 10:52:00,2013-08-14 10:58:00
4,306196521,KSDF,KMKE,2013-08-14 09:00:00,2013-08-14 09:15:00,2013-08-14 09:54:00,2013-08-14 10:04:00
...,...,...,...,...,...,...,...
646704,308962754,KMEM,KSAN,2013-09-11 08:58:00,2013-09-11 09:04:00,2013-09-11 12:07:00,2013-09-11 12:09:00
646705,308961528,KSDF,KSGF,2013-09-11 08:59:00,2013-09-11 09:08:00,2013-09-11 10:03:00,2013-09-11 10:03:00
646706,308962550,KMEM,KRIC,2013-09-11 08:59:00,2013-09-11 09:11:00,2013-09-11 10:37:00,2013-09-11 10:46:00
646707,308962750,KSDF,KHSV,2013-09-11 08:59:00,2013-09-11 09:16:00,2013-09-11 09:58:00,2013-09-11 10:26:00


<h1> Misseleaneous Pickle Files Generation </h1>

<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell reads the cleanTracks.csv which has a mapping between every flight and its corresponding ground speed in knots captured at 1 minute intervals(timestamps) and outputs the dictionary as a pickle file that maps each flightID to its median ground speed/cruise speed. </p></font><br>
<font size="3">
<b>Input</b> &rarr; cleanTracks.csv <br>
<b>Output</b> &rarr; SpeedDict.pkl </font>
</div>

In [3]:
cleanTracks=pd.read_csv("Outputs/cleanTracks.csv")
cleanTracks.sort_values(by=['received'],inplace=True)
speeds = cleanTracks.groupby(["flighthistory_id"])["ground_speed"].median()
df_dict = speeds.to_dict()
avg_speed = np.array(list(df_dict.values())).mean()
med_speed = np.median(list(df_dict.values()))
print("The Average Cruise Speed of a flight is ",avg_speed)
print("The Median Cruise Speed of a flight is ",med_speed)
print("The Number of flights whose cruise speed is known",len(df_dict))
speedDictFile=open("Outputs/SpeedDict.pkl","wb")
pk.dump(df_dict,speedDictFile)
speedDictFile.close()

The Average Cruise Speed of a flight is  386.2909277154822
The Median Cruise Speed of a flight is  407.5
The Number of flights whose cruise speed is known 631131


<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell gives a mapping between the sectors themselves and the centre to which they belong to, by outputting a dict of dict wherein the first index marks the max X coordinate and the second index marks the max Y coordinate.<br> For example, SectorChunkDict[1000000][2000000]=[0,1,2,3,4,...,49] means that sectors 0 to 49 are bounded by  X = [0,1000000] and Y = [0,2000000]    </p></font><br>
<font size="3">
<b>Input</b> &rarr; - <br>
<b>Output</b> &rarr; SectorChunkDict.pkl </font>
</div>

In [60]:
ZoneDict=dict()
for zone in range(1000000,8000000,1000000):
    ZoneDict[zone]=[]
Centres=[[2000000,4000000],[1500000,2000000,2500000,4000000],[1350000,1900000,2400000,4000000],[1000000,1500000,2000000,2500000,4000000],[800000,1250000,1700000,2150000,2700000,4000000],[1750000,2300000,4000000],[4000000]]
CurrentZone=0
for zone in range(len(ZoneDict)):
    CurrentZone+=1000000
    for centre in Centres[zone]:
        tempDict=dict()
        tempDict[centre]=[]
        ZoneDict[CurrentZone].append(tempDict)
CopyDict=copy.deepcopy(ZoneDict)
newDict=defaultdict(dict)
for zone in CopyDict:
    for chunk in CopyDict[zone]:
        inner_key=[key for key in chunk.keys()][0]
        newDict[zone][inner_key]=[]
maxSector=0
for Zone in newDict:
    for Chunk in newDict[Zone]:
        newDict[Zone][Chunk]+=[sectorNumber for sectorNumber in range(maxSector,maxSector+50)]
        maxSector+=50
SectorChunkDict = open("Outputs/SectorChunkDict.pkl", "wb")
pk.dump(newDict,SectorChunkDict)
SectorChunkDict.close()

<h1>Test Days</h1><br>
<font size="4",style="font-family:Ubuntu"> <p> The days tested for are given below - we obtain the 3 metrics for each of the test days and compare the metric with the predicted value from the GA <br><br> Range = [2013-08-14,2013-09-11]</p></font><br>

In [6]:
TEST_DAYS=["2013-08-16","2013-08-17","2013-08-18"]

<h1>Metric 1 - Traffic Factor</h1>

<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell reads the cleanTracks.csv which has a mapping between every flight and the coordinates of its actual path measured at constant time intervals. We use this data to obtain the TrafficFactor for each test day.<b> The Traffic Factor is a measure of the maximum number of aircraft present in every sector (measure the sector count of each sector) at any given time across the entire day.</b> </p></font><br>
<font size="3">
<b>Input</b> &rarr; cleanTracks.csv <br>
<b>Output</b> &rarr; N pickle files where N is the number of days to be tested. </font>
</div>

In [8]:
cleanTracks=pd.read_csv("Outputs/cleanTracks.csv")
flightDates=set(df["received"].str[:10])
flightTimes=set(df["received"].str[:19])
flightDates=list(flightDates)
flightTimes=list(flightTimes)
flightDates=sorted(flightDates)
flightTimes=sorted(list(map(lambda x:datetime.fromisoformat(x),flightTimes)))
flightTimeDict=dict()
for index,val in enumerate(flightTimes):
    flightTimeDict[val]=index
flightTimeDictFile=open("Outputs/flightTimeDict.pkl","wb")
pk.dump(flightTimeDict,flightTimeDictFile)
flightTimeDictFile.close()

In [18]:
for date in flightDates:
    dftemp=df[df["received"].str[:10]==date]
    dftemp.to_csv(f"{date}.csv",index=False)

In [2]:
m = pk.load(open("M_ConversionMetric.pkl", "rb"))
SectorChunkDict = pk.load(open("SectorChunkDict.pkl", "rb"))
output = pk.load(open("ConvexDict.pkl", "rb"))
flightTimeDict = pk.load(open("flightTimeDict.pkl", "rb"))

In [4]:
def getChunk(Point,SectorChunkDict):
    innerDict=SectorChunkDict[math.ceil(Point[0]/1000000)*1000000]
    Y=0
    for key in innerDict.keys():
        if key >= Point[1]:
            Y=key
            break
    return innerDict[Y] if Y!=0 else []

In [5]:
def findSectorPath(path,sectorPath,z,SectorChunkDict,output):
    sector=-1
    for pointIdx in range(len(path)):
        sectorList=getChunk(path[pointIdx],SectorChunkDict)
        for i in sectorList:
            if len(output[i]) < 3:
                continue
            if sp.Polygon(output[i]).contains(sp.Point(path[pointIdx])):
                sector=i
                break
        if sector!=-1:
            sectorPath.append(sector)
        else:
            z.append(pointIdx)

In [6]:
def getSectorTimings(dfHistory, m, timingDict, flightTimeDict,SectorChunkDict,output):
    dfHistory.sort_values(by=['flighthistory_id','received'],inplace=True)
    dfHistory=dfHistory.reset_index()
    dfHistory.drop(['index'],axis=1,inplace=True)
    east,north=m(dfHistory["longitude_degrees"],dfHistory["latitude_degrees"])
    path=list(zip(east,north))
    sectorPath=[]
    z=[]
    findSectorPath(path,sectorPath,z,SectorChunkDict,output)
    dfHistory.drop(z, axis=0, inplace=True)
    dfHistory=dfHistory.reset_index()
    dfHistory.drop(['index'],axis=1,inplace=True)
    timings=list(map(lambda x:datetime.fromisoformat(x),dfHistory["received"].str[:19]))
    i=0
    while i<len(sectorPath):
        element=sectorPath[i]
        start=i
        while i<len(sectorPath) and sectorPath[i]==element:
            i+=1
        end=i-1
        if end==len(sectorPath):
            end-=1
        timingDict[element].append(sorted([flightTimeDict[timings[start]],flightTimeDict[timings[end]]]))

In [7]:
def getNaiveK(timingDict):
    naiveKDict=dict()
    for sector in timingDict:
        p=timingDict[sector]
        if(len(p)==0):
            naiveKDict[sector]=-1
            continue
        arrival=[x[0] for x in p]
        departure=[x[1] for x in p]
        t = max(departure)
        count = [0] * (t + 2)
        for i in range(len(arrival)):
            count[arrival[i]] += 1
            count[departure[i] + 1] -= 1

        max_event_tm = count[0]

        for i in range(1, t + 1):
            count[i] += count[i - 1]
            if count[max_event_tm] < count[i]:
                max_event_tm = i
        naiveKDict[sector] = count[max_event_tm]
    return naiveKDict

In [5]:
Dates=["2013-08-14","2013-08-15","2013-08-16","2013-08-17","2013-08-18","2013-08-19","2013-08-20","2013-08-21","2013-08-22","2013-08-23"]

In [11]:
for date in Dates:
    DayDf=pd.read_csv(f"Naive K/{date}.csv")
    timingDictMulti={}
    for sector in range(1250):
        timingDictMulti[sector]=[]
    getSectorTimings(DayDf,m,timingDictMulti,flightTimeDict,SectorChunkDict,output)
    timingDictFile=open(f"Naive K/Output/timingDict{date}.pkl","wb")
    pk.dump(timingDictMulti,timingDictFile)
    timingDictFile.close()
    KDict=getNaiveK(timingDictMulti)
    KDictFile=open(f"Naive K/Output/KDict{date}.pkl","wb")
    pk.dump(KDict,KDictFile)
    KDictFile.close()

In [6]:
KDict=[]
for dates in Dates:
    tempDict=pk.load(open(f"Naive K/Output/KDict{dates}.pkl", "rb"))
    tempDict=dict(sorted(tempDict.items(),key=lambda x : x[1], reverse =True))
    KDict.append(tempDict)

In [7]:
import math
KDictFinal=dict()
for sector in range(1250):
    DictSum=0
    for dateDict in KDict:
        DictSum+=dateDict[sector]
    DictAvg=math.ceil(DictSum/10)
    KDictFinal[sector]=DictAvg

In [8]:
KDictFinalFile=open(f"KDict.pkl","wb")
pk.dump(KDictFinal,KDictFinalFile)

In [3]:
KDict = pk.load(open("Outputs/KDict.pkl", "rb"))
KDict

{0: 15,
 1: 10,
 2: 9,
 3: 8,
 4: 9,
 5: 0,
 6: 7,
 7: 39,
 8: 7,
 9: 8,
 10: 7,
 11: 6,
 12: 6,
 13: 6,
 14: 6,
 15: 4,
 16: 12,
 17: 9,
 18: 8,
 19: 5,
 20: 7,
 21: 5,
 22: 20,
 23: 38,
 24: 9,
 25: 13,
 26: 6,
 27: 5,
 28: 7,
 29: 6,
 30: 6,
 31: 4,
 32: 5,
 33: 4,
 34: 11,
 35: 7,
 36: 6,
 37: 4,
 38: 6,
 39: 3,
 40: 6,
 41: 8,
 42: 14,
 43: 6,
 44: 8,
 45: 10,
 46: 9,
 47: 4,
 48: 9,
 49: 13,
 50: 9,
 51: 7,
 52: 8,
 53: 7,
 54: 12,
 55: 11,
 56: 5,
 57: 4,
 58: 4,
 59: 9,
 60: 7,
 61: 7,
 62: 5,
 63: 6,
 64: 5,
 65: 4,
 66: 6,
 67: 5,
 68: 4,
 69: 5,
 70: 5,
 71: 7,
 72: 4,
 73: 8,
 74: 5,
 75: 4,
 76: 6,
 77: 6,
 78: 6,
 79: 5,
 80: 4,
 81: 4,
 82: 7,
 83: 5,
 84: 2,
 85: 3,
 86: 8,
 87: 4,
 88: 5,
 89: 6,
 90: 6,
 91: 20,
 92: 6,
 93: 7,
 94: 12,
 95: 4,
 96: 4,
 97: 7,
 98: 5,
 99: 7,
 100: 7,
 101: 8,
 102: 6,
 103: 9,
 104: 6,
 105: 6,
 106: 5,
 107: 7,
 108: 5,
 109: 6,
 110: 11,
 111: 7,
 112: 5,
 113: 5,
 114: 7,
 115: 7,
 116: 5,
 117: 5,
 118: 6,
 119: 7,
 120: 3,
 121:

In [8]:
flights

Unnamed: 0,id,departure_airport_icao_code,arrival_airport_icao_code,scheduled_runway_departure,actual_runway_departure,scheduled_runway_arrival,actual_runway_arrival
0,306354086,KMYR,KBWI,2013-08-18 21:33:00,2013-08-18 21:33:00,2013-08-18 22:37:00,2013-08-18 22:34:00
1,306268157,KRKD,KBOS,2013-08-17 10:40:00,2013-08-17 10:38:00,2013-08-17 11:39:00,2013-08-17 11:37:00
2,308325895,KLEB,KBOS,2013-09-07 21:15:00,2013-09-07 21:05:00,2013-09-07 21:56:00,2013-09-07 21:40:00
3,306126691,KEUG,KPDX,2013-08-16 12:22:00,2013-08-16 12:18:00,2013-08-16 12:45:00,2013-08-16 12:47:00
4,306228870,KEYW,KRSW,2013-08-17 14:10:00,2013-08-17 14:02:00,2013-08-17 15:00:00,2013-08-17 14:46:00
...,...,...,...,...,...,...,...
646707,308738047,KSFO,KIAH,2013-09-11 07:35:00,2013-09-11 07:41:00,2013-09-11 10:58:00,2013-09-11 10:54:00
646708,308710521,KLAS,KIAH,2013-09-11 07:43:00,2013-09-11 07:35:00,2013-09-11 10:11:00,2013-09-11 09:59:00
646709,308711065,KLAX,KIAH,2013-09-11 07:37:00,2013-09-11 07:58:00,2013-09-11 10:25:00,2013-09-11 10:44:00
646710,308710908,KLAX,KCLT,2013-09-11 07:43:00,2013-09-11 07:41:00,2013-09-11 12:01:00,2013-09-11 11:53:00


<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cell reads the Flight_History.csv which has the corresponding information about all the flights that took place in the month of August 2013 and the M and ConvexDict pickle files. It outputs a pickle file that has a mapping between all airports in the Flight_History csv file and their corresponding sector within the USA. </p></font><br>
<font size="3">
<b>Input</b> &rarr; cleanFlightHistory.csv,M_ConversionMetric.pkl,ConvexDict.pkl,airports.json <br>
<b>Output</b> &rarr; airportSectorDict.pkl </font>
</div>

In [9]:
cleanFlights=pd.read_csv("Outputs/cleanFlightHistory.csv")
m=pk.load(open("Outputs/M_ConversionMetric.pkl",'rb'))
ConvexDict=pk.load(open("Outputs/ConvexDict.pkl",'rb'))
airportsDB=json.load(open('Inputs/airports.json', encoding = "utf-8"))

airports=list(set(cleanFlights["departure_airport_icao_code"])) + list(set(cleanFlights["arrival_airport_icao_code"]))
airports=list(set(airports))
airportObjects=dict()
for x in airports:
    if x in airportsDB and airportsDB[x]["country"]=='US':
        airportObjects[x]=m(airportsDB[x]["lon"],airportsDB[x]["lat"])
airportToSector=dict()
for airport in airportObjects:
    found=False
    AssignedSector=0
    point=sp.Point(airportObjects[airport])
    distance=sp.Polygon(ConvexDict[0]).distance(point)
    for i in range(0,1250):
        if len(ConvexDict[i]) < 3:
            continue
        doesContain=sp.Polygon(ConvexDict[i]).contains(point)
        if doesContain:
            airportToSector[airport]=i
            found=True
            break
        if(len(ConvexDict[i])>2):
            t=sp.Polygon(ConvexDict[i]).distance(point)
            if(t<distance):
                distance=t
                AssignedSector=i
    if(not found):
        airportToSector[airport]=AssignedSector
filehandler = open("Outputs/airportSectorDict.pkl","wb")
pk.dump(airportToSector,filehandler)
filehandler.close()

In [None]:
#TEST START

In [11]:
flights=pd.read_csv("Outputs/cleanFlightHistory.csv",low_memory=False)
Tracks=pd.read_csv("Outputs/realTracks.csv")
m = pk.load(open("Outputs/M_ConversionMetric.pkl", "rb"))
speeds = pk.load(open("Outputs/SpeedDict.pkl", "rb"))
SectorGraph = pk.load(open("Outputs/SectorGraph.pkl", "rb"))
Centroids = pk.load(open("Outputs/CentroidDict.pkl", "rb"))
ConvexHulls = pk.load(open("Outputs/ConvexDict.pkl", "rb"))
airportSector=pk.load(open("Outputs/airportSectorDict.pkl", "rb"))

In [250]:
flightID=

In [251]:
speedinKnots=speeds[flightID]
flight=flights[flights['id']==flightID]
start=airportSector[flight['departure_airport_icao_code'].values[0]]
end=airportSector[flight['arrival_airport_icao_code'].values[0]]
print(f"Input to GA:\n{start},{end},0,{speedinKnots}")

realDepart=dt.datetime.fromisoformat(str(flight['actual_runway_departure'].values[0]))
realArrival=dt.datetime.fromisoformat(str(flight['actual_runway_arrival'].values[0]))
flights[flights['id']==flightID]

Input to GA:
989,990,0,354.0


Unnamed: 0,id,departure_airport_icao_code,arrival_airport_icao_code,scheduled_runway_departure,actual_runway_departure,scheduled_runway_arrival,actual_runway_arrival
131965,306020280,KDTW,KMKE,2013-08-16 00:00:00,2013-08-16 00:12:00,2013-08-16 00:48:00,2013-08-16 00:49:00


In [256]:
%matplotlib qt
fig = plt.figure(pk.load(open("Outputs/Simulator.pkl","rb")))

In [257]:
pathFromGA=[989,980,983,998,990]

In [258]:
singleFlightTrack=Tracks[Tracks['flighthistory_id']==flightID]
singleFlightTrack=singleFlightTrack.sort_values('received')
lat=singleFlightTrack['latitude_degrees'].values
lon=singleFlightTrack['longitude_degrees'].values
realX,realY=m(lon,lat)
PathLenGA=0
PathLenReal=0
for pt in range(1,len(realX)):
     PathLenReal+=dst.euclidean((realX[pt-1],realY[pt-1]),(realX[pt],realY[pt]))
mpm_speed=(speedinKnots/1.944)*60
GAx=[Centroids[pathFromGA[0]][0]]
GAy=[Centroids[pathFromGA[0]][1]]
for Hull in range(len(pathFromGA)-1):
    X=[]
    Y=[]
    Plot=ConvexHulls[pathFromGA[Hull]]
    Plot.append(ConvexHulls[pathFromGA[Hull]][0])
    for pt in Plot:
        X.append(pt[0])
        Y.append(pt[1])
    line1 = sp.LineString(list(Plot))
    for neigh in SectorGraph.neighbors(pathFromGA[Hull]):
        if(neigh==pathFromGA[Hull+1]):
            line2 = sp.LineString([Centroids[pathFromGA[Hull]], Centroids[neigh]])
            ABC=line1.intersection(line2)
            PathLenGA+=dst.euclidean((GAx[-1],GAy[-1]),(ABC.x,ABC.y))
            GAx.append(ABC.x)
            GAy.append(ABC.y)
PathLenGA+=dst.euclidean((GAx[-1],GAy[-1]),Centroids[pathFromGA[-1]])
GAx.append(Centroids[pathFromGA[-1]][0])
GAy.append(Centroids[pathFromGA[-1]][1])

In [259]:
plt.plot(realX,realY,linewidth=20,c='b',label="Actual Path")
plt.plot(GAx,GAy,linewidth=20,c='g',label="GA Path")
plt.legend(loc="upper left",prop={'size': 100})
fig.canvas.draw()

In [263]:
print(PathLenGA/mpm_speed)
print(PathLenReal/mpm_speed)
T=((realArrival-realDepart).seconds//3600)*60+(((realArrival-realDepart).seconds//60)%60)
print(T)

47.615939210392554
50.97006467576444
37


In [272]:
((PathLenReal/(T*60))*1.944)

487.6595377086652

In [273]:
(PathLenReal/(T*60))

250.853671660836

In [228]:
import time
dfFlights=pd.read_csv("Outputs/cleanFlightHistory.csv")
ConvexDict = pk.load(open("Outputs/ConvexDict.pkl", "rb"))
dayDf=dfFlights[dfFlights['scheduled_runway_departure'].str.contains("2013-08-16")].copy()
dayDf.sort_values(by=["scheduled_runway_departure"], inplace = True)
dayDf=dayDf[:20]
stlol=time.time()
ProperPath=0
BadPath=0
NoPath=[]
HalfPath=[]
FullPath=[]
for index,flight in dayDf.iterrows():
    start=airportSector[flight['departure_airport_icao_code']]
    end=airportSector[flight['arrival_airport_icao_code']]
    if(start==end):
        continue
    singleFlightTrack=Tracks[Tracks['flighthistory_id']==flight['id']]
    singleFlightTrack=singleFlightTrack.sort_values('received')
    lat=singleFlightTrack['latitude_degrees'].values
    lon=singleFlightTrack['longitude_degrees'].values
    realX,realY=m(lon,lat)
    if(len(realX)==0):
        NoPath.append(flight['id'])
    else:
        StartPoint=(realX[0],realY[0])
        EndPoint=(realX[-1],realY[-1])
        if(not ((sp.Polygon(ConvexDict[start]).contains(sp.Point(StartPoint))) and (sp.Polygon(ConvexDict[end]).contains(sp.Point(EndPoint))))):
            HalfPath.append(flight['id'])
        else:
            FullPath.append(flight['id'])
enlol=time.time()
print(enlol-stlol)

1.796879768371582


In [179]:
import distinctipy as ds
colors=ds.get_colors(5)
print(len(NoPath))
print(len(HalfPath))

119
212


In [229]:
FullPath

[306072605,
 306017890,
 306022466,
 306003307,
 306049714,
 306386666,
 306020239,
 306011989,
 306011591,
 306027806,
 306020280,
 305998957,
 306011214,
 306011477]

In [209]:
sorted(HalfPath)

[305996946,
 305997428,
 305998002,
 305998329,
 305998413,
 305998431,
 305998977,
 305999004,
 306000891,
 306004037,
 306004132,
 306005300,
 306005311,
 306005591,
 306005842,
 306005859,
 306005929,
 306005950,
 306006077,
 306006587,
 306007616,
 306009806,
 306010059,
 306010064,
 306011222,
 306011386,
 306011390,
 306011419,
 306011443,
 306011472,
 306011474,
 306011483,
 306011524,
 306011570,
 306011733,
 306011739,
 306011753,
 306011777,
 306011782,
 306011978,
 306012981,
 306013134,
 306014468,
 306014595,
 306015112,
 306015162,
 306015286,
 306015449,
 306015556,
 306015744,
 306017054,
 306017193,
 306017200,
 306017204,
 306017227,
 306017473,
 306017638,
 306017735,
 306017820,
 306019796,
 306019872,
 306020074,
 306020078,
 306020134,
 306020234,
 306020408,
 306020448,
 306021748,
 306022005,
 306022048,
 306022089,
 306022478,
 306022609,
 306023193,
 306023420,
 306023491,
 306025481,
 306025495,
 306025864,
 306027259,
 306027281,
 306029874,
 306032596,
 306

In [None]:
#TEST END

<div>
<font size="4",style="font-family:Ubuntu"> <p> The following cells reads the Flight_History.csv which has the corresponding information about all the flights that took place in the month of August 2013 and the airportToSector dict pickle file and the speedDict pickle files which has the cruise speed of the flights and generate text files that will serve as an input to the GA which we provide via the upload functionality of the website. </p></font><br>
<font size="3">
<b>Input</b> &rarr; cleanFlightHistory.csv, airportSectorDict, ConvexDict.pkl, SpeedDict.pkl <br>
<b>Output</b> &rarr; Text Files which is an input to the GA via the website</font>
</div>

In [111]:
dfFlights=pd.read_csv("Outputs/cleanFlightHistory.csv")
airportToSector=pk.load(open("Outputs/airportSectorDict.pkl","rb"))
speedDict=pk.load(open("Outputs/SpeedDict.pkl","rb"))
AirportDB=json.load(open('Inputs/airports.json', encoding = "utf-8"))
for day in TestDays:
    dayDf=dfFlights[dfFlights['scheduled_runway_departure'].str.contains(day)].copy()
    dayDf.sort_values(by=["scheduled_runway_departure"], inplace = True)
    ids=list(dayDf['id'])
    source=list(dayDf["departure_airport_icao_code"])
    destination=list(dayDf["arrival_airport_icao_code"])
    startTime=[dt.datetime.fromisoformat(str(date)) for date in dayDf["scheduled_runway_departure"]]
    triplets=list(zip(ids,source,destination))
    start=[]
    for s in startTime:
        hour=s.hour
        minute=s.minute
        if minute < 10:
            minute="0" + str(minute)
        start.append(str(hour) + ":" + str(minute))
    toPrint=""
    for i in range(len(triplets)):
        flightID,sourceAirport,destinationAirport=triplets[i]
        sourceAirportName=AirportDB[sourceAirport]["name"]
        destinationAirportName=AirportDB[destinationAirport]["name"]
        if airportToSector[sourceAirport] == airportToSector[destinationAirport]:
            continue
        if flightID not in speedDict:
            speedDict[flightID]=407.5
        toPrint+=f"{sourceAirportName},{destinationAirportName},1,{start[i]},{speedDict[flightID]}"
        if i!=len(triplets)-1:
            toPrint+="\n"
    f = open(f"{day}-pairs.txt", "w")
    f.write(toPrint)
    f.close()