In [1]:
%load_ext autoreload
%autoreload 2

## Useful links
---
[Link for article about OpenSky Network 2019-2020](https://essd.copernicus.org/articles/13/357/2021/essd-13-357-2021.html)  
[Link for data of the article above](https://zenodo.org/record/6078268#.YhYYyRvjJhE)

In [6]:
from pathlib import Path
import pandas as pd


In [7]:
flightlist = pd.concat(
    pd.read_csv(file, parse_dates=["firstseen", "lastseen", "day"])
    for file in Path("../../data/").glob("flightlist_*.csv.gz")
)

In [2]:
from traffic.data import airports
import altair as alt

airports_subset = list(set(airports.data[["country_wikipedia_link"]].to_numpy().flatten().tolist()))

In [37]:
data = pd.concat(
    (
        flightlist.query(f'origin == "{airport}"')
        # count the number of departing aircraft per day
        .groupby("day")
        .agg(dict(callsign="count"))
        # label the current chunk with the name of the airport
        .rename(columns=dict(callsign=airport))
        # iterate on all airports in the list hereabove
        for airport in sum(airports_subset, [])
    ),
    axis=1,
)

In [38]:
display(data)

Unnamed: 0_level_0,LFPG,EGLL,EHAM,EDDF,LEMD,LIRF,LSZH,UUEE,VHHH,RJBB,...,OMDB,VABB,VIDP,WSSS,CYYZ,KSFO,KLAX,KATL,KJFK,SBGR
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-01 00:00:00+00:00,486,568,466,399,405,323,266,423,,228,...,590,376,516,,307,439,685,,431,268
2019-01-02 00:00:00+00:00,537,592,510,521,492,361,316,470,,225,...,599,355,579,,312,472,777,,497,327
2019-01-03 00:00:00+00:00,551,590,538,519,506,375,305,466,,227,...,624,369,559,,341,489,808,,501,351
2019-01-04 00:00:00+00:00,553,595,523,533,492,382,325,503,,213,...,634,413,590,,348,499,806,,492,333
2019-01-05 00:00:00+00:00,491,541,464,505,427,323,296,492,,230,...,620,392,571,,346,443,743,,501,331
2019-01-06 00:00:00+00:00,542,587,527,540,484,391,324,482,,222,...,584,395,581,,330,404,716,,487,304
2019-01-07 00:00:00+00:00,511,598,538,543,503,377,300,470,,211,...,559,386,573,,327,462,730,,503,340
2019-01-08 00:00:00+00:00,506,580,358,522,477,334,295,482,,223,...,579,376,579,,342,466,783,,490,360
2019-01-09 00:00:00+00:00,507,594,475,532,485,362,316,479,,209,...,568,416,580,,331,409,695,,478,349
2019-01-10 00:00:00+00:00,509,543,487,498,490,368,311,486,,231,...,597,386,588,,362,479,733,,511,342


In [39]:
chart = alt.Chart(
    data.reset_index()
    # prepare data for altair
    .melt("day", var_name="airport", value_name="count")
    # include the name of the city associated with the airport code
    .merge(
        airports.data[["icao", "municipality", "iso_country"]],
        left_on="airport",
        right_on="icao",
        how="left",
    )[["day", "airport", "count", "municipality", "iso_country"]]
    # rename this feature 'city'
    .rename(columns=dict(municipality="city"))
)

In [13]:
data.reset_index().melt("day", var_name="airport", value_name="count").merge(
    airports.data[["icao", "municipality"]],
    left_on="airport",
    right_on="icao",
    how="left",
)[["day", "airport", "count", "municipality",]].rename(columns=dict(municipality="city"))

KeyError: "['country'] not in index"

In [13]:
def full_chart(source, subset, subset_name):

    # We have many airports, only pick a subset
    chart = source.transform_filter(
        alt.FieldOneOfPredicate(field="airport", oneOf=subset)
    )

    # When we come close to a line, highlight it
    highlight = alt.selection(
        type="single", nearest=True, on="mouseover", fields=["airport"]
    )

    # The scatter plot
    points = (
        chart.mark_point()
        .encode(
            x="day",
            y=alt.Y("count", title="# of departing flights"),
            color=alt.Color("airport", legend=alt.Legend(title=subset_name)),
            # add some legend next to  point
            tooltip=["day", "airport", "city", "count"],
            # not too noisy please
            opacity=alt.value(0.5),
        )
        .add_selection(highlight)
    )

    # The trend plot
    lines = (
        chart.mark_line()
        .encode(
            x="day",
            y="count",
            color="airport",
            size=alt.condition(~highlight, alt.value(1), alt.value(3)),
        )
        # the cloud is a bit messy, draw a trend through it
        .transform_loess("day", "count", groupby=["airport"], bandwidth=0.2)
    )

    return lines + points


# Concatenate several plots
result = alt.vconcat(
    *[
        full_chart(chart, airport_, subset_name).properties(width=600, height=150)
        for subset_name, airport_ in zip(
            [
                "European airports",
                "East-Asian airports",
                "Asian/Australian airports",
                "American airports",
            ],
            airports_subset,
        )
    ]
).resolve_scale(color="independent")

result