In [1]:
import pandas as pd
import numpy as np

In [2]:
busyness_by_week_month = pd.read_csv('../../data/raw/date_origin_dest.csv')
busyness_by_week_month.head()

Unnamed: 0,fl_date,origin,dest
0,2019-07-12,MSP,LSE
1,2019-07-12,MSP,LSE
2,2019-07-12,DTW,LSE
3,2019-07-12,LSE,DTW
4,2019-07-12,DTW,FWA


In [3]:
# Create a column with the day/month/year of the flight
busyness_by_week_month['Flight Weekday'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).weekday   #0: Monday, 1:Tuesday, etc.
busyness_by_week_month['Flight Day'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).day
busyness_by_week_month['Flight Month'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).month
busyness_by_week_month['Flight Year'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).year
busyness_by_week_month = busyness_by_week_month.drop(labels = 'fl_date', axis = 1)
busyness_by_week_month.head()

Unnamed: 0,origin,dest,Flight Weekday,Flight Day,Flight Month,Flight Year
0,MSP,LSE,4,12,7,2019
1,MSP,LSE,4,12,7,2019
2,DTW,LSE,4,12,7,2019
3,LSE,DTW,4,12,7,2019
4,DTW,FWA,4,12,7,2019


## Create Busyness Scores for Departures

In [4]:
busyness_by_week_month.columns = ['origin', 'dest', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']
departures = busyness_by_week_month[['origin', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']]
departures.head()

Unnamed: 0,origin,Flight Weekday,Flight Day,Flight Month,Flight Year
0,MSP,4,12,7,2019
1,MSP,4,12,7,2019
2,DTW,4,12,7,2019
3,LSE,4,12,7,2019
4,DTW,4,12,7,2019


In [5]:
departures['num_flights'] = 1
departures.head(2)

Unnamed: 0,origin,Flight Weekday,Flight Day,Flight Month,Flight Year,num_flights
0,MSP,4,12,7,2019,1
1,MSP,4,12,7,2019,1


In [6]:
#Calculate average number of daily departures for each airport

#Create pivot table to calculate total departures per day
avg_dep_per_day = pd.pivot_table(data = departures, index='origin', values = 'num_flights', aggfunc='sum')
avg_dep_per_day = avg_dep_per_day.reset_index()
avg_dep_per_day['num_flights'] = avg_dep_per_day['num_flights'] / (365*2)

#Rename columns
avg_dep_per_day.columns = ["Origin Airport (IATA Code)", "Average Flights Per Day"]

avg_dep_per_day.head()

Unnamed: 0,Origin Airport (IATA Code),Average Flights Per Day
0,ABE,16.475342
1,ABI,5.878082
2,ABQ,74.469863
3,ABR,2.043836
4,ABY,2.767123


In [7]:
#Create pivot table to calculate total departures per date
dep_busyness_scores_by_date = pd.pivot_table(data = departures, index=['origin','Flight Year','Flight Month','Flight Day','Flight Weekday'], values = 'num_flights', aggfunc='sum')
dep_busyness_scores_by_date = dep_busyness_scores_by_date.reset_index()

#Rename columns
dep_busyness_scores_by_date.columns = ["origin", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday", "Total Daily Departures"]

#Create id for departure busyness for future merge
dep_busyness_scores_by_date['departure_busyness_id'] = 'D-' + dep_busyness_scores_by_date['origin'] + '-' + dep_busyness_scores_by_date['Flight Month'].astype(str)+ '-' + dep_busyness_scores_by_date['Flight Weekday'].astype(str)

dep_busyness_scores_by_date.head()


Unnamed: 0,origin,Flight Year,Flight Month,Flight Day,Flight Weekday,Total Daily Departures,departure_busyness_id
0,ABE,2018,1,1,0,10,D-ABE-1-0
1,ABE,2018,1,2,1,12,D-ABE-1-1
2,ABE,2018,1,3,2,13,D-ABE-1-2
3,ABE,2018,1,4,3,12,D-ABE-1-3
4,ABE,2018,1,5,4,12,D-ABE-1-4


In [8]:
avg_dep_busyness = pd.pivot_table(data = dep_busyness_scores_by_date, index = ['origin','departure_busyness_id'], values = 'Total Daily Departures')
avg_dep_busyness = avg_dep_busyness.reset_index()

#Rename columns
avg_dep_busyness.columns = ['Origin Airport (IATA Code)','Departure Busyness ID', 'Avg Daily Departures (Month and Weekday)']
avg_dep_busyness.head(3)

Unnamed: 0,Origin Airport (IATA Code),Departure Busyness ID,Avg Daily Departures (Month and Weekday)
0,ABE,D-ABE-1-0,13.888889
1,ABE,D-ABE-1-1,11.8
2,ABE,D-ABE-1-2,12.9


In [9]:
departure_busyness_scores = pd.merge(avg_dep_busyness, avg_dep_per_day, on = 'Origin Airport (IATA Code)', how = 'left')
departure_busyness_scores['Departure Busyness Score'] = departure_busyness_scores['Avg Daily Departures (Month and Weekday)'] / departure_busyness_scores['Average Flights Per Day'] 
departure_busyness_scores = departure_busyness_scores.drop(labels = ['Origin Airport (IATA Code)', 'Avg Daily Departures (Month and Weekday)', 'Average Flights Per Day'], axis=1)
departure_busyness_scores.head()

Unnamed: 0,Departure Busyness ID,Departure Busyness Score
0,D-ABE-1-0,0.843011
1,D-ABE-1-1,0.716222
2,D-ABE-1-2,0.782988
3,D-ABE-1-3,0.856499
4,D-ABE-1-4,0.857342


In [10]:
departure_busyness_scores.head(30)

Unnamed: 0,Departure Busyness ID,Departure Busyness Score
0,D-ABE-1-0,0.843011
1,D-ABE-1-1,0.716222
2,D-ABE-1-2,0.782988
3,D-ABE-1-3,0.856499
4,D-ABE-1-4,0.857342
5,D-ABE-1-5,0.644903
6,D-ABE-1-6,0.781471
7,D-ABE-10-0,1.112774
8,D-ABE-10-1,0.965079
9,D-ABE-10-2,1.080402


In [11]:
#save to CSV
departure_busyness_scores.to_csv('../../data/processed/departure_busyness_scores.csv')

## Create Busyness Scores for Arrivals

In [12]:
arrivals = busyness_by_week_month[['dest', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']]
arrivals.head()

Unnamed: 0,dest,Flight Weekday,Flight Day,Flight Month,Flight Year
0,LSE,4,12,7,2019
1,LSE,4,12,7,2019
2,LSE,4,12,7,2019
3,DTW,4,12,7,2019
4,FWA,4,12,7,2019


In [13]:
arrivals['num_flights'] = 1
arrivals.head(2)

Unnamed: 0,dest,Flight Weekday,Flight Day,Flight Month,Flight Year,num_flights
0,LSE,4,12,7,2019,1
1,LSE,4,12,7,2019,1


In [14]:
#Calculate average number of daily departures for each airport

#Create pivot table to calculate total departures per day
avg_arr_per_day = pd.pivot_table(data = arrivals, index='dest', values = 'num_flights', aggfunc='sum')
avg_arr_per_day = avg_arr_per_day.reset_index()
avg_arr_per_day['num_flights'] = avg_arr_per_day['num_flights'] / (365*2)

#Rename columns
avg_arr_per_day.columns = ["Destination Airport (IATA Code)", "Average Flights Arriving Per Day"]

avg_arr_per_day.head()

Unnamed: 0,Destination Airport (IATA Code),Average Flights Arriving Per Day
0,ABE,16.469863
1,ABI,5.878082
2,ABQ,74.479452
3,ABR,2.046575
4,ABY,2.767123


In [15]:
#Create pivot table to calculate total arrivals per day
arr_busyness_scores_by_date = pd.pivot_table(data = arrivals, index=['dest','Flight Year','Flight Month','Flight Day','Flight Weekday'], values = 'num_flights', aggfunc='sum')
arr_busyness_scores_by_date = arr_busyness_scores_by_date.reset_index()

#Rename columns
arr_busyness_scores_by_date.columns = ["dest", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday", "Total Daily Arrivals"]

#Create id for arrivals busyness for future merge
arr_busyness_scores_by_date['arrivals_busyness_id'] = 'A-' + arr_busyness_scores_by_date['dest'] + '-' + arr_busyness_scores_by_date['Flight Month'].astype(str)+ '-' + arr_busyness_scores_by_date['Flight Weekday'].astype(str)

arr_busyness_scores_by_date.head()


Unnamed: 0,dest,Flight Year,Flight Month,Flight Day,Flight Weekday,Total Daily Arrivals,arrivals_busyness_id
0,ABE,2018,1,1,0,10,A-ABE-1-0
1,ABE,2018,1,2,1,12,A-ABE-1-1
2,ABE,2018,1,3,2,13,A-ABE-1-2
3,ABE,2018,1,4,3,12,A-ABE-1-3
4,ABE,2018,1,5,4,11,A-ABE-1-4


In [16]:
avg_arr_busyness = pd.pivot_table(data = arr_busyness_scores_by_date, index = ['dest','arrivals_busyness_id'], values = 'Total Daily Arrivals')
avg_arr_busyness = avg_arr_busyness.reset_index()

#Rename columns
avg_arr_busyness.columns = ['Destination Airport (IATA Code)','Arrivals Busyness ID', 'Avg Daily Arrivals (Month and Weekday)']
avg_arr_busyness.head(3)

Unnamed: 0,Destination Airport (IATA Code),Arrivals Busyness ID,Avg Daily Arrivals (Month and Weekday)
0,ABE,A-ABE-1-0,13.777778
1,ABE,A-ABE-1-1,12.0
2,ABE,A-ABE-1-2,13.0


In [17]:
arrival_busyness_scores = pd.merge(avg_arr_busyness, avg_arr_per_day, on = 'Destination Airport (IATA Code)', how = 'left')
arrival_busyness_scores['Arrivals Busyness Score'] = arrival_busyness_scores['Avg Daily Arrivals (Month and Weekday)'] / arrival_busyness_scores['Average Flights Arriving Per Day']
arrival_busyness_scores = arrival_busyness_scores.drop(labels = ['Destination Airport (IATA Code)', 'Avg Daily Arrivals (Month and Weekday)', 'Average Flights Arriving Per Day'], axis=1)
arrival_busyness_scores.head()

Unnamed: 0,Arrivals Busyness ID,Arrivals Busyness Score
0,A-ABE-1-0,0.836545
1,A-ABE-1-1,0.728604
2,A-ABE-1-2,0.78932
3,A-ABE-1-3,0.86353
4,A-ABE-1-4,0.850037


In [18]:
arrival_busyness_scores.head(30)

Unnamed: 0,Arrivals Busyness ID,Arrivals Busyness Score
0,A-ABE-1-0,0.836545
1,A-ABE-1-1,0.728604
2,A-ABE-1-2,0.78932
3,A-ABE-1-3,0.86353
4,A-ABE-1-4,0.850037
5,A-ABE-1-5,0.652707
6,A-ABE-1-6,0.774141
7,A-ABE-10-0,1.113144
8,A-ABE-10-1,0.959328
9,A-ABE-10-2,1.086834


In [19]:
#save to CSV
arrival_busyness_scores.to_csv('../../data/processed/arrival_busyness_scores.csv')

## Commented Out Cells

In [20]:
# avg_dep_busyness = pd.pivot_table(data = dep_busyness_scores_by_date, index = 'departure_busyness_id', values = 'Total Daily Departures')
# avg_dep_busyness = dep_busyness_scores_by_date.reset_index()
# avg_dep_busyness.head(20)

In [21]:
# avg_dep_busyness.columns = ['departure_busyness_id','Average Daily Departures']
# avg_dep_busyness.head()

In [22]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels=['Flight Month', 'Flight Day', 'Flight Weekday'], axis=1)
# dep_busyness_scores_by_date.head()

In [23]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels='origin', axis=1)
# dep_busyness_scores_by_date.head()

In [24]:
# dep_busyness_scores_by_date = pd.merge(dep_busyness_scores_by_date, avg_dep_busyness, on = 'departure_busyness_id', how = 'left')

In [25]:
# dep_busyness_scores_by_date.head()

In [26]:
# dep_busyness_scores_by_date.columns

In [27]:
# #rearranage columns
# dep_busyness_scores_by_date = dep_busyness_scores_by_date[['Flight Date', 'origin', 'departure_busyness_id', 'Total Daily Departures', 'Average Daily Departures']]
# dep_busyness_scores_by_date.head()

In [28]:
# dep_busyness_scores_by_date['dep_busyness_score'] = dep_busyness_scores_by_date['Total Daily Departures'] / dep_busyness_scores_by_date['Average Daily Departures']
# dep_busyness_scores_by_date.head()

In [29]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels = ['Total Daily Departures', 'Average Daily Departures', 'departure_busyness_id'], axis=1)

In [30]:
# dep_busyness_scores_by_date.head()

In [31]:
# dep_busyness_scores_by_date.columns = ['Flight Date', 'Origin Airport (IATA Code)', 'Departure Busyness Score']
# dep_busyness_scores_by_date.head()

ValueError: Length mismatch: Expected axis has 7 elements, new values have 3 elements

In [None]:
dep_busyness_scores_by_date.to_csv('../../data/processed/departure_busyness_scores.csv')

In [None]:
# departures_by_day = pd.pivot_table(data = departures, index = ["origin", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday"], values = 'num_flights', aggfunc='count')
# departures_by_day = departures_by_day.reset_index()
# departures_by_day

In [None]:
# departures_by_day.sort_values("num_flights", ascending=False)

In [None]:
# departures_by_day.info()

In [None]:
# avg_departures_by_day = pd.pivot_table(data = departures_by_day, index = ["origin", "Flight Month", "Flight Weekday"], values = "num_flights")
# avg_departures_by_day = avg_departures_by_day.reset_index()
# avg_departures_by_day.sort_values("num_flights", ascending=False)

In [None]:
# avg_departures_by_day['departure_busyness_id'] = avg_departures_by_day['origin'] + '-' + avg_departures_by_day['Flight Month'].astype(str)+ '-' + avg_departures_by_day['Flight Weekday'].astype(str)
# avg_departures_by_day.head()

In [None]:
# arrivals.head()

In [None]:
# arrivals['num_flights'] = 1
# arrivals.head()