In [3]:
# Loading in necessary packages
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.express as px

pyo.init_notebook_mode(connected=True)

In [4]:
# Read in our data from CSV file
df = pd.read_csv("covid_impact_on_airport_traffic.csv")

# Taking a peek at the Dataframe
df.head()

Unnamed: 0,AggregationMethod,Date,Version,AirportName,PercentOfBaseline,Centroid,City,State,ISO_3166_2,Country,Geography
0,Daily,2020-04-03,1.0,Kingsford Smith,64,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
1,Daily,2020-04-13,1.0,Kingsford Smith,29,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
2,Daily,2020-07-10,1.0,Kingsford Smith,54,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
3,Daily,2020-09-02,1.0,Kingsford Smith,18,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
4,Daily,2020-10-31,1.0,Kingsford Smith,22,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."


In [5]:
# Check for null values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7247 entries, 0 to 7246
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   AggregationMethod  7247 non-null   object 
 1   Date               7247 non-null   object 
 2   Version            7247 non-null   float64
 3   AirportName        7247 non-null   object 
 4   PercentOfBaseline  7247 non-null   int64  
 5   Centroid           7247 non-null   object 
 6   City               7247 non-null   object 
 7   State              7247 non-null   object 
 8   ISO_3166_2         7247 non-null   object 
 9   Country            7247 non-null   object 
 10  Geography          7247 non-null   object 
dtypes: float64(1), int64(1), object(9)
memory usage: 622.9+ KB


In [6]:
# Generates a descriptive summary of our DataFrame
df.describe()

Unnamed: 0,Version,PercentOfBaseline
count,7247.0,7247.0
mean,1.0,66.651442
std,0.0,22.134433
min,1.0,0.0
25%,1.0,53.0
50%,1.0,67.0
75%,1.0,84.0
max,1.0,100.0


In [5]:
# Checks for duplicated records, a sometimes "invisible" mistake
print(f"Number of duplicates found: {df.duplicated().sum()}")

Number of duplicates found: 0


In [7]:
# Adjust and rename columns for easier use
df.rename(columns=lambda x:x.strip().lower(), inplace=True)

# Drops column containing dataset version
df.drop(["version"], axis="columns", inplace=True)

# Parsing dates for easier plotting
df.date = pd.to_datetime(df.date)
df.head()

Unnamed: 0,aggregationmethod,date,airportname,percentofbaseline,centroid,city,state,iso_3166_2,country,geography
0,Daily,2020-04-03,Kingsford Smith,64,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
1,Daily,2020-04-13,Kingsford Smith,29,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
2,Daily,2020-07-10,Kingsford Smith,54,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
3,Daily,2020-09-02,Kingsford Smith,18,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."
4,Daily,2020-10-31,Kingsford Smith,22,POINT(151.180087713813 -33.9459774986125),Sydney,New South Wales,AU,Australia,"POLYGON((151.164354085922 -33.9301772341877, 1..."


In [8]:
# Create a dictionary to represent days of the week
# Python's built-in datetime module recognizes days of the week this way 
days = {
    0:"Monday",
    1:"Tuesday",
    2:"Wednesday",
    3:"Thursday",
    4:"Friday",
    5:"Saturday",
    6:"Sunday",
}

In [9]:
# Using the dictionary we created, we re-interpret the dates of the data into
# another column called "weekdays"
weekday_records = df.date.apply(lambda x:days.get(x.weekday())).value_counts()
print(weekday_records)

# Number of records per month
monthly_records = df.date.apply(lambda x:x.month).value_counts()

Wednesday    1064
Tuesday      1064
Monday       1040
Thursday     1036
Friday       1034
Saturday     1015
Sunday        994
Name: date, dtype: int64


In [19]:

import plotly.graph_objects as go

# Assuming you have weekday_records as a pandas Series containing the data

# Define the color you want to use for the bars
bar_color = 'violet'

# Create the bar graph
daily_bar = go.Figure(data=[go.Bar(x=weekday_records.index, 
                                   y=weekday_records.values, 
                                   marker_color=bar_color)])

# Scales graph up for visibility
daily_bar.update_layout(title_text="Total Records by Weekday",
                        yaxis_range=[min(weekday_records.values) - 80, 
                                     max(weekday_records.values) + 20])
daily_bar.update_yaxes(title_text="Total Number of Records")

# Show the bar graph
daily_bar.show()


In [20]:

import plotly.express as px

# Assuming you have monthly_records as a pandas Series containing the data

# Define the color you want to use for the bars
bar_color = 'violet'

# Create the bar graph
monthly_bar = px.bar(
    x=monthly_records.index,
    y=monthly_records.values, 
    labels={"x":"Months", 
            "y":"Total Number of Records"}, 
    title="Total Number of Daily records by Month",
    color_discrete_sequence=[bar_color]
)

# Show the bar graph
monthly_bar.show()


In [21]:
# # Plots number of airport records running at XX% of their pre-COVID baseline   
import plotly.express as px

# Assuming you have df with the necessary data

# Define the color you want to use for the histogram bars
bar_color = 'violet'

# Create the histogram plot
hist = px.histogram(df, 
                    x=df.percentofbaseline, 
                    labels={"percentofbaseline": "Percent of Baseline"},
                    title="Total Records of Operation: Running at (%) of Their Pre-COVID Baseline",
                    nbins=30, 
                    marginal="box",
                    color_discrete_sequence=[bar_color])

# Show the histogram plot
hist.show()


In [22]:
import plotly.express as px

# Assuming you have df DataFrame with the required data

# Define the color palette you want to use for the scatter plot
# You can use any valid Plotly color name or provide RGB or HEX color codes
color_palette = ['red', 'blue', 'green', 'orange', 'purple', 'yellow']

# Create the scatter plot
airport_scatter = px.scatter(df, 
                             x='percentofbaseline', 
                             y='airportname', 
                             labels={"percentofbaseline":"Percent of Baseline", 
                                     "airportname":"Airport Name", 
                                     "country":"Country", 
                                     "city":"City"},
                             color='country',
                             hover_data=["country", "city"],
                             title="Operation Record Distribution by Airport",
                             color_discrete_sequence=color_palette)

# Show the scatter plot
airport_scatter.show()


In [23]:
# Number of records per country
country_records = df.country.value_counts()

# Number of records per state
state_records = df.state.value_counts()

# Number of records per city
city_records = df.city.value_counts()

In [25]:
import plotly.express as px

# Assuming you have country_records as a pandas Series containing the data

# Define the colors you want to use for the pie chart
colors = ['darkgreen', 'salmon', 'skyblue', 'gold', 'mediumorchid', 'tomato']

# Create the pie chart
country_piechart = px.pie(country_records, 
                          values=country_records, 
                          names=country_records.index, 
                          title="Flight Records by Country",
                          color_discrete_sequence=colors)

# Show the pie chart
country_piechart.show()



In [26]:
# Parses the "centroid" column of our data set into work-able latitude and longitude coordinates
def centroid_parser(centroid):
    df["longitude"] = df.centroid.apply(lambda x:x.split()[0].replace("POINT(", " "))
    df["latitude"] = df.centroid.apply(lambda x:x.split()[1].replace(")", " "))
    return "Centroid to coordinates translation complete!"
    
centroid_parser(df.centroid)

'Centroid to coordinates translation complete!'

In [27]:

import plotly.express as px

scatter_baseline = px.scatter_geo(df,
                                  lat=df.latitude,
                                  lon=df.longitude,
                                  hover_data=["airportname", "city"],
                                  title="Airports Reporting Operating Activity During On-going COVID-19 Pandemic")

# Update the marker color to 'red'
scatter_baseline.update_traces(marker={"symbol": "cross", "color": "red"})

# Change the projection type
scatter_baseline.update_geos(projection_type="eckert4")

# Show the scatter_geo plot
scatter_baseline.show()
