In [1]:
import altair as alt
import pandas as pd
import numpy as np

## Interactive Visualization

In [2]:
landing = pd.read_csv('Air_Traffic_Landings_Statistics.csv')
landing.head()

Unnamed: 0,Activity Period,Operating Airline,Operating Airline IATA Code,Published Airline,Published Airline IATA Code,GEO Summary,GEO Region,Landing Aircraft Type,Aircraft Body Type,Aircraft Manufacturer,Aircraft Model,Aircraft Version,Landing Count,Total Landed Weight
0,200507,ABX Air,GB,ABX Air,GB,Domestic,US,Freighter,Narrow Body,McDonnell Douglas,DC-9,30,40,4066000
1,200507,ABX Air,GB,ABX Air,GB,Domestic,US,Freighter,Narrow Body,McDonnell Douglas,DC-9,41,1,102000
2,200507,ATA Airlines,TZ,ATA Airlines,TZ,Domestic,US,Passenger,Narrow Body,Boeing,757,200,2,396000
3,200507,ATA Airlines,TZ,ATA Airlines,TZ,Domestic,US,Passenger,Narrow Body,Boeing,757,300,167,37408000
4,200507,Air Canada,AC,Air Canada,AC,International,Canada,Passenger,Wide Body,Boeing,767,333,1,320000


In [3]:
landing['After 2022'] = landing['Activity Period'] > 201912
landing_sample = landing[landing['After 2022'] == True]
landing_sample.head()

Unnamed: 0,Activity Period,Operating Airline,Operating Airline IATA Code,Published Airline,Published Airline IATA Code,GEO Summary,GEO Region,Landing Aircraft Type,Aircraft Body Type,Aircraft Manufacturer,Aircraft Model,Aircraft Version,Landing Count,Total Landed Weight,After 2022
24429,202001,ABC Aerolineas S.A. de C.V. dba Interjet,4O,ABC Aerolineas S.A. de C.V. dba Interjet,4O,International,Mexico,Passenger,Narrow Body,Airbus,A320,-,30,4282475,True
24430,202001,ABC Aerolineas S.A. de C.V. dba Interjet,4O,ABC Aerolineas S.A. de C.V. dba Interjet,4O,International,Mexico,Passenger,Narrow Body,Airbus,A321,-,1,171520,True
24431,202001,"Aer Lingus, Ltd.",EI,"Aer Lingus, Ltd.",EI,International,Europe,Passenger,Wide Body,Airbus,A332,-,5,2001796,True
24432,202001,"Aer Lingus, Ltd.",EI,"Aer Lingus, Ltd.",EI,International,Europe,Passenger,Wide Body,Airbus,A333,-,26,10604230,True
24433,202001,Aeromexico,AM,Aeromexico,AM,International,Mexico,Passenger,Narrow Body,Boeing,B738,-,102,14706400,True


In [4]:
landing_sample.isna().sum()

Activity Period                 0
Operating Airline               0
Operating Airline IATA Code    50
Published Airline               0
Published Airline IATA Code    50
GEO Summary                     0
GEO Region                      0
Landing Aircraft Type           0
Aircraft Body Type              0
Aircraft Manufacturer           6
Aircraft Model                  0
Aircraft Version                0
Landing Count                   0
Total Landed Weight             0
After 2022                      0
dtype: int64

In [5]:
brush = alt.selection_interval(encodings=['x','y'])

rect_alt = alt.Chart(landing_sample).mark_rect().encode(
    alt.X("Total Landed Weight:Q",bin=alt.Bin(maxbins=30)),
    alt.Y("GEO Region:O"),
    alt.Color("count()")
).properties(
    height=400
).add_selection(
    brush
)

pie_alt = alt.Chart(landing_sample).mark_arc().encode(
    theta=alt.Theta(field="Aircraft Body Type", type="nominal"),
    color=alt.Color(field="Aircraft Body Type", type="nominal"),
).transform_filter(
    brush
)

dashboard = rect_alt.properties(width=400) | pie_alt.properties(width=300)
dashboard

In [6]:
dashboard.save('FinalProject_interactive_plot.json')

## Contextual Visualization

In [7]:
passenger = pd.read_csv('Air_Traffic_Passenger_Statistics.csv')
passenger.head()

Unnamed: 0,Activity Period,Operating Airline,Operating Airline IATA Code,Published Airline,Published Airline IATA Code,GEO Summary,GEO Region,Activity Type Code,Price Category Code,Terminal,Boarding Area,Passenger Count
0,200507,ATA Airlines,TZ,ATA Airlines,TZ,Domestic,US,Deplaned,Low Fare,Terminal 1,B,27271
1,200507,ATA Airlines,TZ,ATA Airlines,TZ,Domestic,US,Enplaned,Low Fare,Terminal 1,B,29131
2,200507,ATA Airlines,TZ,ATA Airlines,TZ,Domestic,US,Thru / Transit,Low Fare,Terminal 1,B,5415
3,200507,Air Canada,AC,Air Canada,AC,International,Canada,Deplaned,Other,Terminal 1,B,35156
4,200507,Air Canada,AC,Air Canada,AC,International,Canada,Enplaned,Other,Terminal 1,B,34090


In [8]:
passenger['After 2022'] = passenger['Activity Period'] > 201912
passenger_sample = passenger[passenger['After 2022'] == True]
passenger_sample.head()

Unnamed: 0,Activity Period,Operating Airline,Operating Airline IATA Code,Published Airline,Published Airline IATA Code,GEO Summary,GEO Region,Activity Type Code,Price Category Code,Terminal,Boarding Area,Passenger Count,After 2022
21685,202001,ABC Aerolineas S.A. de C.V. dba Interjet,4O,ABC Aerolineas S.A. de C.V. dba Interjet,4O,International,Mexico,Deplaned,Other,International,A,4367,True
21686,202001,ABC Aerolineas S.A. de C.V. dba Interjet,4O,ABC Aerolineas S.A. de C.V. dba Interjet,4O,International,Mexico,Enplaned,Other,International,A,3923,True
21687,202001,"Aer Lingus, Ltd.",EI,"Aer Lingus, Ltd.",EI,International,Europe,Deplaned,Other,International,G,6857,True
21688,202001,"Aer Lingus, Ltd.",EI,"Aer Lingus, Ltd.",EI,International,Europe,Enplaned,Other,International,G,5790,True
21689,202001,Aeromexico,AM,Aeromexico,AM,International,Mexico,Deplaned,Other,International,A,15964,True


In [9]:
count_passenger = passenger_sample.groupby("GEO Region")['Passenger Count'].sum()
count_passenger

GEO Region
Asia                    2848866
Australia / Oceania      533035
Canada                  1152644
Central America          373331
Europe                  3081252
Mexico                  1865000
Middle East              664724
US                     48857688
Name: Passenger Count, dtype: int64

In [10]:
first_data = pd.DataFrame({'GEO Region':count_passenger.index, 'Passenger Count': count_passenger.values})
first_data

Unnamed: 0,GEO Region,Passenger Count
0,Asia,2848866
1,Australia / Oceania,533035
2,Canada,1152644
3,Central America,373331
4,Europe,3081252
5,Mexico,1865000
6,Middle East,664724
7,US,48857688


In [11]:
first_plot = alt.Chart(first_data).mark_bar().encode(
    alt.X("GEO Region:O", axis=alt.Axis(labelAngle=0)),
    alt.Y("Passenger Count:Q"),
).properties(
    width=600,
    height=400
)
first_plot

In [12]:
first_plot.properties(width='container').save('FinalProject_first_plot.json')

In [13]:
sample_time = passenger_sample.groupby(["GEO Region","Activity Period"])['Passenger Count'].sum()
sample_time

GEO Region  Activity Period
Asia        202001              539551
            202002              292718
            202003              127355
            202004               11212
            202005               15026
                                ...   
US          202202             1899004
            202203             2559920
            202204             2720760
            202205             2840779
            202206             3048863
Name: Passenger Count, Length: 232, dtype: int64

In [14]:
#sample_time.values

In [15]:
region = []
period = []
count = []
for i in range(len(sample_time.index)):
    region.append(sample_time.index[i][0])
    period.append(sample_time.index[i][1])
    count.append(sample_time.values[i])

In [16]:
time_data = pd.DataFrame({'GEO Region':region,'Activity Period':period, 'Passenger Count': count})
time_data

Unnamed: 0,GEO Region,Activity Period,Passenger Count
0,Asia,202001,539551
1,Asia,202002,292718
2,Asia,202003,127355
3,Asia,202004,11212
4,Asia,202005,15026
...,...,...,...
227,US,202202,1899004
228,US,202203,2559920
229,US,202204,2720760
230,US,202205,2840779


In [17]:
second_plot = alt.Chart(time_data).mark_line().encode(
    alt.X("Activity Period:O", axis=alt.Axis(labelAngle=-45)),
    alt.Y("Passenger Count:Q"),
    alt.Color("GEO Region:O", )
).properties(
    width=600,
    height=400
)
second_plot

In [18]:
second_plot.properties(width='container').save('FinalProject_second_plot.json')