In [29]:
import altair as alt
from vega_datasets import data
import pandas as pd
import glob

# Altair has an upper limit of records around 2000. Disable that to handle any records
alt.data_transformers.disable_max_rows()

# Read the user input date
d = input("""Enter the date for visualization in YYYYMMDD format. 
          Example: 20200115 (for January 15th 2020)
          Range: January 1st 2019 till August 31st 2020: """)
input_date = d
d = d[:-2]

# Specify the path where data is present 
path = r'C:/Users/Chethan/Desktop/TUD/TUD Sem 3/Research Project/DataSet/' 
all_files = glob.glob(path + "*" + d + "*.csv")

li = [] 
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

df = df.dropna()
input_date = input_date[0:4] + "-" + input_date[4:6] + "-" + input_date[6:] + " 00:00:00+00:00"

# Dataframe of that particular date
df = df[df["day"] == input_date]


# Create mouseover selection
select_city = alt.selection_single(
    on="mouseover", nearest=True, fields=["origin"], empty="none"
)

# Preprocessing of data for visualization
vertices = dict() # Airport ID and count of flights flying in and out both
positions = [] # Airport ID with latitude and longitude
for index, r in df.iterrows():
    if r["origin"] in vertices.keys():
        vertices[r["origin"]] += 1
    else:
        vertices[r["origin"]] = 1
        positions.append([r["origin"],r["latitude_1"],r["longitude_1"]])

    if r["destination"] in vertices.keys():
        vertices[r["destination"]] += 1
    else:
        vertices[r["destination"]] = 1
        positions.append([r["destination"],r["latitude_2"],r["longitude_2"]])

for row in positions:
    row.append(vertices[row[0]])
# Make a dataframe of processed data
airports = pd.DataFrame(positions, columns = ['airport','latitude','longitude','flights_count'])

# Load maps and set background
source = alt.topo_feature(data.world_110m.url, 'countries')
background = alt.Chart(source).mark_geoshape(
    fill="lightgray",
    stroke='white'
).properties(
    width=750,
    height=500
).project("equirectangular")

edges = df.groupby(["origin","destination"]).size().reset_index(name='counts')

lookup_data = alt.LookupData(
    airports, key="airport", fields=["flights_count", "latitude", "longitude"]
)

# Edges
connections = alt.Chart(edges).mark_rule(opacity=1.0, color="red").encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q",
    color=alt.Color( 'counts:Q', scale=alt.Scale(range=['#FF6666', '#330000']), legend=alt.Legend(title="Connectivity"))
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).transform_lookup(
    lookup="destination",
    from_=lookup_data,
    as_=["flights_count","lat2", "lon2"]
).transform_filter(
    select_city
)

# Nodes
points = alt.Chart(edges).mark_circle().encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    size=alt.Size("flights_count:Q", scale=alt.Scale(range=[0, 1000]), legend=None),
    tooltip=["origin", "flights_count:Q"]
).transform_aggregate(
    routes="count()",
    groupby=["origin"]
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).add_selection(
    select_city
)


(background + connections + points).configure_view(stroke=None)


Enter the date for visualization in YYYYMMDD format. 
          Example: 20200115 (for January 15th 2020)
          Range: January 1st 2019 till August 31st 2020: 20200303
