In [24]:
import pandas as pd
import numpy as np
import googlemaps

In [25]:
routes_csv = pd.read_csv('../data/routesDist.csv')

In [26]:
usSrcRoutes = routes_csv[routes_csv['SRCCTRY'] == 'United States']
print(len(usSrcRoutes))

usDestinRoutes = routes_csv[routes_csv['DESTINCTRY'] == 'United States']
print(len(usDestinRoutes))

13021
13016


In [27]:
mergeCols = ['AIRLINE_ID', 'SRC_AIRPT_ID', 'DESTIN_AIRPT_ID']

usRoutes = usSrcRoutes.merge(usDestinRoutes, how='outer', on=mergeCols)
len(usRoutes)

15519

In [28]:
usRoutesTest = pd.concat([usSrcRoutes,usDestinRoutes])
usRoutesTest = usRoutesTest.drop_duplicates()
len(usRoutesTest)

15519

In [29]:
#re-using variable above that was used to show concat and outer join provide similar data
usRoutes = usRoutesTest[['SRC_AIRPT_ID', 'SRCNAME', 'DESTIN_AIRPT_ID', 'DESTINNAME']]

In [30]:
def distCalc(x, y):
    file = open("../dataEngineering/gMapsAPIKey.txt")
    gMapsKey = file.read()
    file.close()

    gmaps = googlemaps.Client(key=gMapsKey)

    distance = gmaps.distance_matrix(x, y)['rows'][0]['elements'][0]

    try:
        meters = distance['distance']['value']
    except:
        meters = r'\N'

    try:
        seconds = distance['duration']['value']
    except:
        seconds = r'\N'

    return(meters,seconds)

In [31]:
usRoutes = usRoutes.reset_index()
usRoutes = usRoutes.rename(columns={'index': 'og_index'})

In [32]:
permM = []
permS = []

In [None]:
m = []
s = []

for i in range(len(usRoutes)):
    mVal, sVal = distCalc(usRoutes['SRCNAME'][i], usRoutes['DESTINNAME'][i])
    m.append(mVal)
    s.append(sVal)
    

In [None]:
permM.append(m)
permS.append(s)

In [33]:
permSeriesM = pd.DataFrame(permM)
permSeriesS = pd.DataFrame(permS)

permSeriesM.to_csv('../data/meters.csv')
permSeriesS.to_csv('../data/seconds.csv')

In [35]:
permSeriesM = pd.read_csv('../data/meters.csv')
permSeriesS = pd.read_csv('../data/seconds.csv')

secondsSeries = pd.Series(permSeriesS.to_numpy().flatten())
metersSeries = pd.Series(permSeriesM.to_numpy().flatten())

usRoutes["seconds"] = secondsSeries
usRoutes["meters"] = metersSeries

usRoutes.to_csv('../data/usRoutes.csv')

In [36]:
usRoutes

Unnamed: 0,og_index,SRC_AIRPT_ID,SRCNAME,DESTIN_AIRPT_ID,DESTINNAME,seconds,meters
0,166,3531,Kodiak Airport,7162,Larsen Bay Airport,0,0
1,167,7162,Larsen Bay Airport,7161,Karluk Airport,\N,\N
2,248,5726,Southeast Iowa Regional Airport,3830,Chicago O'Hare International Airport,\N,\N
3,249,5726,Southeast Iowa Regional Airport,3678,St Louis Lambert International Airport,13361,386142
4,250,4042,Decatur Airport,3830,Chicago O'Hare International Airport,11518,323800
...,...,...,...,...,...,...,...
15514,64969,1821,General Francisco J. Mujica International Airport,3484,Los Angeles International Airport,31207,914688
15515,64970,1821,General Francisco J. Mujica International Airport,3747,Chicago Midway International Airport,22054,613634
15516,65045,1835,Licenciado y General Ignacio Lopez Rayon Airport,3484,Los Angeles International Airport,21284,597326
15517,65051,1855,General Leobardo C. Ruiz International Airport,3484,Los Angeles International Airport,86827,2649815


In [None]:
#graphs to make, plot on the map routes that can be made
#A graph that shows how many routes are in a certain distancce


In [59]:
import plotly.graph_objects as go

airports = pd.read_csv('../data/openFlightsRaw/airports.csv')
usRoutes = pd.read_csv('../data/usRoutes.csv')

mapDat = usRoutes.merge(airports, how="left", left_on='SRCNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'SRCLAT', 'LONG': 'SRCLONG'})

mapDat = mapDat.merge(airports, how="left", left_on='DESTINNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'DESTINLAT', 'LONG': 'DESTINLONG'})

srcFig = go.Figure(data=go.Scattergeo(
    name = "Has Outgoing Traffic",
    lat = mapDat['SRCLAT'].tolist(),
    lon = mapDat['SRCLONG'].tolist(), 
    mode = 'markers',
    opacity=0.70,
    marker = dict(
        size = 5,
        color = 'blue',
        symbol = 'triangle-up',
        standoff = 3
    )
))

destinFig = go.Figure(data=go.Scattergeo(
    name = "Has Incoming Traffic",
    lat = mapDat['DESTINLAT'].tolist(),
    lon = mapDat['DESTINLONG'].tolist(), 
    mode = 'markers',
    opacity=0.70,
    marker = dict(
        size = 5,
        color = 'red',
        symbol = 'triangle-down'
    )
))

fig = go.Figure(data=go.Scattergeo())
fig.add_traces(srcFig._data)
fig.add_traces(destinFig._data)

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=True,
    geo=dict(
        scope = 'world',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()

In [49]:
import plotly.express as px

usRoutes = pd.read_csv('../data/usRoutes.csv')

nullVal = r"\N"
usRoutes = usRoutes[usRoutes['meters'] != nullVal]

usRoutes['meters'] = usRoutes['meters'].astype(int)
usRoutes['seconds'] = usRoutes['seconds'].astype(int)

usRoutes['hours'] = round(usRoutes['seconds']/3600, 0)

usRoutes['countVal'] = 1
usRoutesHours = usRoutes.groupby('hours')['countVal'].sum()
usRoutesHours = pd.DataFrame(usRoutesHours)
usRoutesHours = usRoutesHours.reset_index()

fig = px.bar(usRoutesHours, x='hours', y='countVal',
              labels = {
                  'countVal': "Count",
                  'hours': 'Time (hour)'
              },
              title = "Majority of Routes are within 20 hours by car")
fig.show()

usRoutes['km'] = round(usRoutes['meters']/1000, -2)
usRoutesKM = usRoutes.groupby('km')['countVal'].sum()
usRoutesKM = pd.DataFrame(usRoutesKM)
usRoutesKM = usRoutesKM.reset_index()

fig2 = px.bar(usRoutesKM, x = 'km', y = 'countVal',
              labels = {
                  'countVal': "Count",
                  'km': 'Distance (km)'
              },
              title = "Majority of Routes are within 2.5 Mm")
fig2.show()

In [51]:
import plotly.graph_objects as go

airports = pd.read_csv('../data/openFlightsRaw/airports.csv')
usRoutes = pd.read_csv('../data/usRoutes.csv')

mapDat = usRoutes.merge(airports, how="left", left_on='SRCNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'SRCLAT', 'LONG': 'SRCLONG'})

mapDat = mapDat.merge(airports, how="left", left_on='DESTINNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'DESTINLAT', 'LONG': 'DESTINLONG'})

nullVal = r"\N"
mapDat = mapDat[mapDat['meters'] == nullVal]

fig = go.Figure()

for index, row in mapDat.iterrows():

    fig.add_trace(go.Scattergeo(
        mode = "markers+lines",
        lat = [row['SRCLAT'], row['DESTINLAT']],
        lon = [row['SRCLONG'], row['DESTINLONG']],
        opacity=0.01,
        marker = dict(
            size = 10,
            color = 'black'
    )
        ))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=False,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()

In [57]:
usRoutes = pd.read_csv('../data/usRoutes.csv')

nullVal = r"\N"
usRoutes = usRoutes[usRoutes['meters'] == nullVal]

usRoutes['countVal'] = 1
usRoutesAirlines = usRoutes.groupby('SRCNAME')['countVal'].sum()
usRoutesAirlines = pd.DataFrame(usRoutesAirlines)
usRoutesAirlines = usRoutesAirlines.reset_index()

usRoutesAirlines = usRoutesAirlines[usRoutesAirlines['countVal'] >= 50]
fig = px.bar(usRoutesAirlines, x='SRCNAME', y='countVal',
              labels = {
                  'countVal': "Count",
                  'SRCNAME': 'Airports'
              },
              title = "Airports that Source 50 or more Routes")
fig.show()

In [68]:
import plotly.graph_objects as go

airports = pd.read_csv('../data/openFlightsRaw/airports.csv')
usRoutes = pd.read_csv('../data/usRoutes.csv')

mapDat = usRoutes.merge(airports, how="left", left_on='SRCNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'SRCLAT', 'LONG': 'SRCLONG'})

mapDat = mapDat.merge(airports, how="left", left_on='DESTINNAME', right_on='NAME')
mapDat = mapDat.rename(columns={'LAT': 'DESTINLAT', 'LONG': 'DESTINLONG'})

nullVal = r"\N"
mapDat = mapDat[mapDat['meters'] != nullVal]
mapDat = mapDat[(mapDat['meters']).astype(int) < 1000000]
mapDat = mapDat[mapDat['SRCNAME'] == 'Chicago O\'Hare International Airport']

fig = go.Figure()

for index, row in mapDat.iterrows():

    fig.add_trace(go.Scattergeo(
        mode = "markers+lines",
        lat = [row['SRCLAT'], row['DESTINLAT']],
        lon = [row['SRCLONG'], row['DESTINLONG']],
        opacity=0.5,
        marker = dict(
            size = 10,
            color = 'blue'
    )
        ))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=False,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()