In [3]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import DBSCAN

In [160]:
airports_csv = pd.read_csv('../data/openFlightsRaw/airports.csv')
usRoutes_csv = pd.read_csv('../data/usRoutesClean.csv')


airportsData = airports_csv.merge(usRoutes_csv, how="inner", left_on='AIRPT_ID', right_on='SRC_AIRPT_ID')
airportsData = airportsData[airportsData['CTRY'] == 'United States']

In [161]:
airportsCoords = airportsData[['LAT', 'LONG']]
airportsCoords = airportsCoords.to_numpy()

## KMeans

In [144]:
inertiaList = []
kList = range(1, 50)

for k in kList:
    kMeans = KMeans(n_clusters = k, random_state = 0, n_init = 10).fit(airportsCoords)
    inertiaList.append(kMeans.inertia_)

In [145]:
inertiaDF = pd.DataFrame(inertiaList, columns=['Inertias'])
fig = px.scatter(inertiaDF, y="Inertias")
fig.show()

In [146]:
airportsData['Kcluster'] = KMeans(n_clusters = 12, random_state = 0, n_init = 10).fit_predict(airportsCoords)
airportsData['KclusterColor'] = airportsData['Kcluster'].map(pd.Series(px.colors.qualitative.Dark24))

In [147]:
fig = go.Figure()

fig.add_trace(go.Scattergeo(
    name = 'string',
    lat = airportsData['LAT'].tolist(),
    lon = airportsData['LONG'].tolist(), 
    mode = 'markers',
    marker = dict(
        size = 3,
        color = airportsData['KclusterColor'].tolist(),
    ),
))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=True,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()

In [149]:
clust0ATL = airportsData[(airportsData['Kcluster'] == 0) & (airportsData['DESTINIATA'] == 'ATL')]

clust0SRC = clust0ATL['SRCNAME']

LAXtoClust0 = airportsData[(airportsData['SRCIATA'] == 'LAX') & (airportsData['DESTINNAME'].isin(clust0SRC))]

clust0ATL = clust0ATL.sort_values(by='meters')
LAXtoClust0 = LAXtoClust0.sort_values(by='meters')

In [150]:
clust0ATLFiltered = clust0ATL[clust0ATL['SRCNAME'].isin(LAXtoClust0['DESTINNAME'])]

In [151]:
kClustTotal = LAXtoClust0.merge(clust0ATLFiltered, how="left", left_on='DESTINNAME', right_on='SRCNAME')

kClustTotal = kClustTotal.rename(columns={'SRCNAME_x': 'SRC', 'DESTINNAME_x': 'LAYOVER', 'DESTINNAME_y': 'DESTIN', 'meters_x': "TRIP1m", 'seconds_x': 'TRIP1s', 'meters_y': "TRIP2m", 'seconds_y': 'TRIP2s'})

kClustTotal = pd.DataFrame(kClustTotal, columns=['SRC', 'LAYOVER', 'DESTIN', 'TRIP1m', 'TRIP1s', 'TRIP2m', 'TRIP2s'])

kClustTotal['TripTotm'] = kClustTotal['TRIP1m'] + kClustTotal['TRIP2m']
kClustTotal['TripTots'] = kClustTotal['TRIP1s'] + kClustTotal['TRIP2s'] + (90*60)

In [152]:
kClustTotal

Unnamed: 0,SRC,LAYOVER,DESTIN,TRIP1m,TRIP1s,TRIP2m,TRIP2s,TripTotm,TripTots
0,Los Angeles International Airport,Memphis International Airport,Hartsfield Jackson Atlanta International Airport,2932427,94705,611464,20347,3543891,120452
1,Los Angeles International Airport,Nashville International Airport,Hartsfield Jackson Atlanta International Airport,3267755,105524,410693,13843,3678448,124767
2,Los Angeles International Airport,Charlotte Douglas International Airport,Hartsfield Jackson Atlanta International Airport,3913245,127225,396184,13676,4309429,146301
3,Los Angeles International Airport,Raleigh Durham International Airport,Hartsfield Jackson Atlanta International Airport,4122228,133546,648403,21655,4770631,160601


## Density

In [153]:
itemList = []
epsilonList = np.arange(0.00001, 0.055, 0.00001)

for e in epsilonList:
    dbscan = DBSCAN(eps = e, min_samples = 10, metric = 'haversine').fit((np.radians(airportsCoords)))
    itemList.append(len(np.unique(dbscan.labels_)))

In [154]:
itemDF = pd.DataFrame({'Epislon': epsilonList, 'Clusters': itemList})
fig = px.scatter(itemDF, y = 'Clusters', x = 'Epislon')
fig.show()

In [168]:
airportsData['densityCluster'] = DBSCAN(eps = 0.02666, min_samples = 10, metric = 'haversine').fit_predict((np.radians(airportsCoords)))

#Keep in mind -1 represents noise, and doesn't end up getting clustered
airportsData['densityCluster'] = airportsData['densityCluster'] + 1
airportsData['densityClusterColor'] = airportsData['densityCluster'].map(pd.Series(px.colors.qualitative.Alphabet))
#airportsData = airportsData[airportsData['densityCluster'] != 0]

In [170]:
fig = go.Figure()

fig.add_trace(go.Scattergeo(
    name = 'string',
    lat = airportsData['LAT'].tolist(),
    lon = airportsData['LONG'].tolist(), 
    mode = 'markers',
    opacity=0.70,
    marker = dict(
        size = 3,
        color = airportsData['densityClusterColor'].tolist(),
    )
))

fig.update_layout(
    title_text='All US Airports have Incoming and Outgoing Traffic',
    showlegend=True,
    geo=dict(
        scope = 'usa',
        showland = True,
        landcolor = 'lightgray',
    )
)

fig.show()