In [24]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import DBSCAN


In [19]:
airports_csv = pd.read_csv('../data/openFlightsRaw/airports.csv')
usRoutes_csv = pd.read_csv('../data/usRoutesClean.csv')


airportsData = airports_csv.merge(usRoutes_csv, how="inner", left_on='AIRPT_ID', right_on='SRC_AIRPT_ID')

airportsCoords = airportsData[['LAT', 'LONG']]
airportsCoords = airportsCoords.to_numpy()

In [20]:
inertiaList = []
kList = range(1, 50)

for k in kList:
    kMeans = KMeans(n_clusters = k, random_state = 0, n_init = 10).fit(airportsCoords)
    inertiaList.append(kMeans.inertia_)

In [21]:
inertiaDF = pd.DataFrame(inertiaList, columns=['Inertias'])
fig = px.scatter(inertiaDF, y="Inertias")
fig.show()

In [22]:
airportsData['Kcluster'] = KMeans(n_clusters = 9, random_state = 0, n_init = 10).fit_predict(airportsCoords)
airportsData['KclusterColor'] = airportsData['Kcluster'].map(pd.Series(px.colors.qualitative.Dark24))

In [57]:
fig = go.Figure()

fig.add_trace(go.Scattergeo(
    name = 'string',
    lat = airportsData['LAT'].tolist(),
    lon = airportsData['LONG'].tolist(), 
    mode = 'markers',
    marker = dict(
        size = 3,
        color = airportsData['KclusterColor'].tolist(),
    ),
))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=True,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()

In [42]:
#Cluster 1 is international
airportCluster1 = airportsData[airportsData['Kcluster'] == 2]

airportsCoordsCluster1 = airportCluster1[['LAT', 'LONG']]
airportsCoordsCluster1 = airportsCoordsCluster1.to_numpy()

In [43]:
itemList = []
epsilonList = np.arange(0.005, 1, 0.005)

for e in epsilonList:
    dbscan = DBSCAN(eps = e, min_samples = 10, metric = 'haversine').fit((np.radians(airportsCoordsCluster1)))
    itemList.append(len(np.unique(dbscan.labels_)))

In [44]:
itemDF = pd.DataFrame({'Epislon': epsilonList, 'Clusters': itemList})
fig = px.scatter(itemDF, y = 'Clusters', x = 'Epislon')
fig.show()

In [55]:
airportCluster1['densityCluster'] = DBSCAN(eps = 0.03, min_samples = 10, metric = 'haversine').fit_predict((np.radians(airportsCoordsCluster1)))

#Keep in mind -1 represents noise, and doesn't end up getting clustered
airportCluster1['densityCluster'] = airportCluster1['densityCluster'] + 1
airportCluster1['densityClusterColor'] = airportCluster1['densityCluster'].map(pd.Series(px.colors.qualitative.Dark24))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [49]:
airportCluster1

Unnamed: 0,AIRPT_ID,NAME,CTY,CTRY,IATA,ICAO,LAT,LONG,ALT,TZ,...,DESTIN_AIRPT_ID,DESTINNAME,seconds,meters,SRCIATA,DESTINIATA,Kcluster,KclusterColor,densityCluster,densityClusterColor
1,49,Edmonton International Airport,Edmonton,Canada,YEG,CYEG,53.309700,-113.580002,2373,-7,...,3484,Los Angeles International Airport,182870,4570445,YEG,LAX,2,#1CA71C,0,#2E91E5
2,49,Edmonton International Airport,Edmonton,Canada,YEG,CYEG,53.309700,-113.580002,2373,-7,...,3839,Palm Springs International Airport,234919,4846788,YEG,PSP,2,#1CA71C,0,#2E91E5
6,87,Kelowna International Airport,Kelowna,Canada,YLW,CYLW,49.956100,-119.377998,1421,-8,...,3484,Los Angeles International Airport,174868,3602736,YLW,LAX,2,#1CA71C,0,#2E91E5
37,156,Vancouver International Airport,Vancouver,Canada,YVR,CYVR,49.193901,-123.183998,14,-8,...,3877,McCarran International Airport,112946,3141831,YVR,LAS,2,#1CA71C,1,#E15F99
38,156,Vancouver International Airport,Vancouver,Canada,YVR,CYVR,49.193901,-123.183998,14,-8,...,3484,Los Angeles International Airport,69360,1834750,YVR,LAX,2,#1CA71C,1,#E15F99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11558,7122,Merced Regional Macready Field,Merced,United States,MCE,KMCE,37.284698,-120.514000,155,-8,...,3484,Los Angeles International Airport,92363,2746401,MCE,LAX,2,#1CA71C,1,#E15F99
11575,7579,Provo Municipal Airport,Provo,United States,PVU,KPVU,40.219200,-111.723000,4497,-7,...,6505,Phoenix-Mesa-Gateway Airport,39737,1103694,PVU,AZA,2,#1CA71C,6,#B68100
11576,7579,Provo Municipal Airport,Provo,United States,PVU,KPVU,40.219200,-111.723000,4497,-7,...,3484,Los Angeles International Airport,41330,1241103,PVU,LAX,2,#1CA71C,6,#B68100
11577,7579,Provo Municipal Airport,Provo,United States,PVU,KPVU,40.219200,-111.723000,4497,-7,...,3453,Metropolitan Oakland International Airport,50130,1455472,PVU,OAK,2,#1CA71C,6,#B68100


In [56]:
fig = go.Figure()

fig.add_trace(go.Scattergeo(
    name = 'string',
    lat = airportCluster1['LAT'].tolist(),
    lon = airportCluster1['LONG'].tolist(), 
    mode = 'markers',
    opacity=0.70,
    marker = dict(
        size = 3,
        color = airportCluster1['densityClusterColor'].tolist()
    )
))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=True,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()