In [1]:
import glob
import pandas as pd
from pathlib import Path

In [2]:
data_files = [filename for filename in Path('./data').glob('*.csv')]

In [3]:
data_files[0:3]

[PosixPath('data/flightlist_20200201_20200229.csv'),
 PosixPath('data/flightlist_20200101_20200131.csv'),
 PosixPath('data/flightlist_20200301_20200331.csv')]

In [4]:
df_january = pd.read_csv(data_files[1])

In [5]:
df_january.head()

Unnamed: 0,callsign,number,icao24,registration,typecode,origin,destination,firstseen,lastseen,day
0,SVA816,,710064,HZ-AK24,B77W,,,2019-12-31 00:19:47+00:00,2020-01-01 13:27:28+00:00,2020-01-01 00:00:00+00:00
1,THY183,,4ba9c1,TC-JNA,A332,,LTBW,2019-12-31 00:25:26+00:00,2020-01-01 07:48:41+00:00,2020-01-01 00:00:00+00:00
2,CES738,MU738,781418,,,YMML,YSSY,2019-12-31 01:08:54+00:00,2020-01-01 01:45:29+00:00,2020-01-01 00:00:00+00:00
3,CES771,MU771,781858,,,YSSY,EHAM,2019-12-31 01:29:25+00:00,2020-01-01 04:08:28+00:00,2020-01-01 00:00:00+00:00
4,HAL5,HA5,a455ea,N379HA,A332,KLAS,,2019-12-31 02:23:05+00:00,2020-01-01 08:52:18+00:00,2020-01-01 00:00:00+00:00


In [6]:
df_february = pd.read_csv(data_files[0])

In [7]:
df_february.head()

Unnamed: 0,callsign,number,icao24,registration,typecode,origin,destination,firstseen,lastseen,day
0,CES297,MU297,780d9e,B-2022,B77W,,KJFK,2020-01-31 00:00:08+00:00,2020-02-01 02:11:45+00:00,2020-02-01 00:00:00+00:00
1,LAN576,LA576,e80214,CC-BGA,B789,,SKBO,2020-01-31 00:00:08+00:00,2020-02-01 00:03:33+00:00,2020-02-01 00:00:00+00:00
2,SVA870,,7100bf,HZ-AK31,B77W,,RPLL,2020-01-31 00:00:11+00:00,2020-02-01 08:45:10+00:00,2020-02-01 00:00:00+00:00
3,CBJ429,,78131e,,,,LPAR,2020-01-31 01:16:27+00:00,2020-02-01 06:28:50+00:00,2020-02-01 00:00:00+00:00
4,KQA304,KQ304,04c14a,5Y-CYD,B738,FAOR,OMDB,2020-01-31 01:18:57+00:00,2020-02-01 01:44:59+00:00,2020-02-01 00:00:00+00:00


In [8]:
flightlist = pd.concat(
    pd.read_csv(file, parse_dates=["firstseen", "lastseen", "day"])
    for file in Path("./data").glob("flightlist_*.csv.gz")
)

In [11]:
import altair as alt
from traffic.data import airports

airports_subset = [
    # Europe
    ["LFPG", "EGLL", "EHAM", "EDDF", "LEMD", "LIRF", "LSZH", "UUEE"],
    # Eastern Asia
    ["VHHH", "RJBB", "RJTT", "RKSI", "RCTP", "RPLL"],
    # Asia (other)
    ["YSSY", "YMML", "OMDB", "VABB", "VIDP", "WSSS"],
    # Americas
    ["CYYZ", "KSFO", "KLAX", "KATL", "KJFK", "SBGR"],
]

data = pd.concat(
    (
        flightlist.query(f'origin == "{airport}"')
        .groupby("day")
        .agg(dict(callsign="count"))
        .rename(columns=dict(callsign=airport))
        for airport in sum(airports_subset, [])
    ),
    axis=1,
)

chart = alt.Chart(
    data.reset_index()
    .melt("day", var_name="airport", value_name="count")
    .merge(
        airports.data[["icao", "municipality"]],
        left_on="airport",
        right_on="icao",
        how="left",
    )[["day", "airport", "count", "municipality"]]
    .rename(columns=dict(municipality="city"))
)

def full_chart(source, subset, subset_name):

    chart = source.transform_filter(
        alt.FieldOneOfPredicate(field="airport", oneOf=subset)
    )

    highlight = alt.selection(
        type="single", nearest=True, on="mouseover", fields=["airport"]
    )

    points = (
        chart.mark_point()
        .encode(
            x="day",
            y=alt.Y("count", title="# of departing flights"),
            color=alt.Color("airport", legend=alt.Legend(title=subset_name)),
            tooltip=["day", "airport", "city", "count"],
            opacity=alt.value(0.5),
        )
        .add_selection(highlight)
    )

    lines = (
        chart.mark_line()
        .encode(
            x="day",
            y="count",
            color="airport",
            size=alt.condition(~highlight, alt.value(1), alt.value(3)),
        )
        # the cloud is a bit messy, draw a trend through it
        .transform_loess("day", "count", groupby=["airport"], bandwidth=0.2)
    )

    return lines + points


result = alt.vconcat(
    *[
        full_chart(chart, airport_, subset_name).properties(width=600, height=150)
        for subset_name, airport_ in zip(
            [
                "European airports",
                "East-Asian airports",
                "Asian/Australian airports",
                "American airports",
            ],
            airports_subset,
        )
    ]
).resolve_scale(color="independent")

result