In [27]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np



In [28]:
# loading the dataframes
df = pd.read_csv("../data/CleanedFlights.csv")

dfAirports = pd.read_csv("../data/Airports.csv")

# Dropping the cancelled Flights, because they are not relevant for this visualization

df = df[df["CANCELLED"] == 0]
dfAirports.head()


Columns (7,8) have mixed types.Specify dtype option on import or set low_memory=False.



Unnamed: 0,IATA_CODE,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
0,ABE,Lehigh Valley International Airport,Allentown,PA,USA,40.65236,-75.4404
1,ABI,Abilene Regional Airport,Abilene,TX,USA,32.41132,-99.6819
2,ABQ,Albuquerque International Sunport,Albuquerque,NM,USA,35.04022,-106.60919
3,ABR,Aberdeen Regional Airport,Aberdeen,SD,USA,45.44906,-98.42183
4,ABY,Southwest Georgia Regional Airport,Albany,GA,USA,31.53552,-84.19447


In [29]:
# creating dictionaries from the IATA_CODE to the Airport, Latitude and Longiture

AirportDict = pd.Series(dfAirports.AIRPORT.values, index = dfAirports.IATA_CODE).to_dict()
LatitudeDict = pd.Series(dfAirports.LATITUDE.values, index = dfAirports.IATA_CODE).to_dict()
LongitudeDict = pd.Series(dfAirports.LONGITUDE.values, index = dfAirports.IATA_CODE).to_dict()


In [30]:
# sorting the dataframe by airport and their total flights

df = df.groupby(["ORIGIN_AIRPORT", "DESTINATION_AIRPORT"]).count().sort_values("YEAR", ascending = False)

# dropping not needet columns and renaming

df.drop(df.columns[[0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]], axis = 1, inplace = True)
df.rename(columns = {df.columns[0]: "COUNT"}, inplace= True)

# adding columns for the full airport name, latitude and longitude

df.insert(column= "ORIGIN_AIRPORT_SHORT", loc = 0, value = df.index.get_level_values(0))
df.insert(column= "DESTINATION_AIRPORT_SHORT", loc = 0, value = df.index.get_level_values(1))

df.insert(column = "ORIGIN_AIRPORT_LONG", loc = 3, value = "")
df.insert(column = "ORIGIN_LATITUDE", loc = 4, value = "")
df.insert(column = "ORIGIN_LONGITUDE", loc = 5, value = "")

df.insert(column = "DESTINATION_AIRPORT_LONG", loc = 6, value = "")
df.insert(column = "DESTINATION_LATITUDE", loc = 7, value = "")
df.insert(column = "DESTINATION_LONGITUDE", loc = 8, value = "")


df["ORIGIN_AIRPORT_LONG"] = df["ORIGIN_AIRPORT_SHORT"].map(AirportDict)
df["ORIGIN_LATITUDE"] = df["ORIGIN_AIRPORT_SHORT"].map(LatitudeDict)
df["ORIGIN_LONGITUDE"] = df["ORIGIN_AIRPORT_SHORT"].map(LongitudeDict)

df["DESTINATION_AIRPORT_LONG"] = df["DESTINATION_AIRPORT_SHORT"].map(AirportDict)
df["DESTINATION_LATITUDE"] = df["DESTINATION_AIRPORT_SHORT"].map(LatitudeDict)
df["DESTINATION_LONGITUDE"] = df["DESTINATION_AIRPORT_SHORT"].map(LongitudeDict)

df = df.reset_index()
df.drop(df.columns[[0,1]], axis = 1, inplace = True)
df = df[:25]
df.head(50)

Unnamed: 0,DESTINATION_AIRPORT_SHORT,ORIGIN_AIRPORT_SHORT,COUNT,ORIGIN_AIRPORT_LONG,ORIGIN_LATITUDE,ORIGIN_LONGITUDE,DESTINATION_AIRPORT_LONG,DESTINATION_LATITUDE,DESTINATION_LONGITUDE
0,LAX,SFO,13406,San Francisco International Airport,37.619,-122.37484,Los Angeles International Airport,33.94254,-118.40807
1,SFO,LAX,13115,Los Angeles International Airport,33.94254,-118.40807,San Francisco International Airport,37.619,-122.37484
2,LAX,JFK,11875,John F. Kennedy International Airport (New Yor...,40.63975,-73.77893,Los Angeles International Airport,33.94254,-118.40807
3,JFK,LAX,11872,Los Angeles International Airport,33.94254,-118.40807,John F. Kennedy International Airport (New Yor...,40.63975,-73.77893
4,LAX,LAS,9652,McCarran International Airport,36.08036,-115.15233,Los Angeles International Airport,33.94254,-118.40807
5,LAS,LAX,9528,Los Angeles International Airport,33.94254,-118.40807,McCarran International Airport,36.08036,-115.15233
6,ORD,LGA,9204,LaGuardia Airport (Marine Air Terminal),40.77724,-73.87261,Chicago O'Hare International Airport,41.9796,-87.90446
7,LGA,ORD,9167,Chicago O'Hare International Airport,41.9796,-87.90446,LaGuardia Airport (Marine Air Terminal),40.77724,-73.87261
8,JFK,SFO,8324,San Francisco International Airport,37.619,-122.37484,John F. Kennedy International Airport (New Yor...,40.63975,-73.77893
9,SFO,JFK,8323,John F. Kennedy International Airport (New Yor...,40.63975,-73.77893,San Francisco International Airport,37.619,-122.37484


In [31]:
# exporting the re-organized dataframe as csv to use it in an html doc
df.to_json("DataFrameForMostCommonRoutes.json", orient= "index")

In [109]:
import random
color = "%06x" % random.randint(0, 0xFFFFFF)

accessToken = 'pk.eyJ1IjoiZmxvcmFpbmsiLCJhIjoiY2wxbjh1cGZyMDl6YjNlcGd4MXZ2bDNoeSJ9.FzxEZ_wKMQ-sZn9L9vBKWQ'


fig = go.Figure(go.Scattermapbox(
    mode = "markers+lines",
    marker = {'size': 10}))


fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'center': {'lon': -100, 'lat': 38},
        'style': "dark",
        'zoom': 3,
        'accesstoken': accessToken})
for i, row in df.iterrows():
    fig.add_trace(go.Scattermapbox(
        mode = "markers+lines",
        lat = [row.ORIGIN_LATITUDE, row.DESTINATION_LATITUDE ],
        lon = [row.ORIGIN_LONGITUDE, row.DESTINATION_LONGITUDE],
        name = row.ORIGIN_AIRPORT_SHORT + " to " +  row.DESTINATION_AIRPORT_SHORT,
        line = dict(
            width = row.COUNT / 3000,
        ),
        opacity = 0.75,
        customdata = row.ORIGIN_AIRPORT_LONG + row.DESTINATION_AIRPORT_LONG,
        hovertemplate = "%{customdata}"
    ))
    if i >= 20:
        break

fig.show()

ValueError: 
    Invalid value of type 'builtins.str' received for the 'customdata' property of scattermapbox
        Received value: 'San Francisco International AirportLos Angeles International Airport'

    The 'customdata' property is an array that may be specified as a tuple,
    list, numpy array, or pandas Series