In [35]:
import pandas as pd
import numpy as np

In [36]:
busyness_by_week_month = pd.read_csv('../../data/raw/date_origin_dest.csv')
busyness_by_week_month.head()

Unnamed: 0,fl_date,origin,dest
0,2019-07-12,MSP,LSE
1,2019-07-12,MSP,LSE
2,2019-07-12,DTW,LSE
3,2019-07-12,LSE,DTW
4,2019-07-12,DTW,FWA


In [37]:
# Create a column with the day/month/year of the flight
busyness_by_week_month['Flight Weekday'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).weekday   #0: Monday, 1:Tuesday, etc.
busyness_by_week_month['Flight Day'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).day
busyness_by_week_month['Flight Month'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).month
busyness_by_week_month['Flight Year'] = pd.DatetimeIndex(busyness_by_week_month['fl_date']).year
busyness_by_week_month = busyness_by_week_month.drop(labels = 'fl_date', axis = 1)
busyness_by_week_month.head()

Unnamed: 0,origin,dest,Flight Weekday,Flight Day,Flight Month,Flight Year
0,MSP,LSE,4,12,7,2019
1,MSP,LSE,4,12,7,2019
2,DTW,LSE,4,12,7,2019
3,LSE,DTW,4,12,7,2019
4,DTW,FWA,4,12,7,2019


## Create Busyness Scores for Departures

In [96]:
busyness_by_week_month.columns = ['origin', 'dest', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']
departures = busyness_by_week_month[['origin', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']]
departures.head()

Unnamed: 0,origin,Flight Weekday,Flight Day,Flight Month,Flight Year
0,MSP,4,12,7,2019
1,MSP,4,12,7,2019
2,DTW,4,12,7,2019
3,LSE,4,12,7,2019
4,DTW,4,12,7,2019


In [69]:
departures['num_flights'] = 1
departures.head(2)

Unnamed: 0,origin,Flight Weekday,Flight Day,Flight Month,Flight Year,num_flights
0,MSP,4,12,7,2019,1
1,MSP,4,12,7,2019,1


In [88]:
#Create pivot table to calculate total departures per day
dep_busyness_scores_by_date = pd.pivot_table(data = departures, index=['origin','Flight Year','Flight Month','Flight Day','Flight Weekday'], values = 'num_flights', aggfunc='sum')
dep_busyness_scores_by_date = dep_busyness_scores_by_date.reset_index()

#Rename columns
dep_busyness_scores_by_date.columns = ["origin", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday", "Total Daily Departures"]

#Create id for departure busyness for future merge
dep_busyness_scores_by_date['departure_busyness_id'] = 'D-' + dep_busyness_scores_by_date['origin'] + '-' + dep_busyness_scores_by_date['Flight Month'].astype(str)+ '-' + dep_busyness_scores_by_date['Flight Weekday'].astype(str)

dep_busyness_scores_by_date.head()


Unnamed: 0,origin,Flight Year,Flight Month,Flight Day,Flight Weekday,Total Daily Departures,departure_busyness_id
0,ABE,2018,1,1,0,10,D-ABE-1-0
1,ABE,2018,1,2,1,12,D-ABE-1-1
2,ABE,2018,1,3,2,13,D-ABE-1-2
3,ABE,2018,1,4,3,12,D-ABE-1-3
4,ABE,2018,1,5,4,12,D-ABE-1-4


In [109]:
avg_dep_busyness = pd.pivot_table(data = dep_busyness_scores_by_date, index = 'departure_busyness_id', values = 'Total Daily Departures')
avg_dep_busyness = avg_dep_busyness.reset_index()

#Rename columns
avg_dep_busyness.columns = ['Departure Busyness ID', 'Avg Daily Departures (Month and Weekday)']
avg_dep_busyness.head(3)

Unnamed: 0,Departure Busyness ID,Avg Daily Departures (Month and Weekday)
0,D-ABE-1-0,13.888889
1,D-ABE-1-1,11.8
2,D-ABE-1-2,12.9


In [111]:
#save to CSV
avg_dep_busyness = avg_dep_busyness.to_csv('../../data/processed/departure_busyness_scores.csv')

## Create Busyness Scores for Arrivals

In [101]:
arrivals = busyness_by_week_month[['dest', 'Flight Weekday', 'Flight Day', 'Flight Month', 'Flight Year']]
arrivals.head()

Unnamed: 0,dest,Flight Weekday,Flight Day,Flight Month,Flight Year
0,LSE,4,12,7,2019
1,LSE,4,12,7,2019
2,LSE,4,12,7,2019
3,DTW,4,12,7,2019
4,FWA,4,12,7,2019


In [102]:
arrivals['num_flights'] = 1
arrivals.head(2)

Unnamed: 0,dest,Flight Weekday,Flight Day,Flight Month,Flight Year,num_flights
0,LSE,4,12,7,2019,1
1,LSE,4,12,7,2019,1


In [107]:
#Create pivot table to calculate total arrivals per day
arr_busyness_scores_by_date = pd.pivot_table(data = arrivals, index=['dest','Flight Year','Flight Month','Flight Day','Flight Weekday'], values = 'num_flights', aggfunc='sum')
arr_busyness_scores_by_date = arr_busyness_scores_by_date.reset_index()

#Rename columns
arr_busyness_scores_by_date.columns = ["dest", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday", "Total Daily Arrivals"]

#Create id for arrivals busyness for future merge
arr_busyness_scores_by_date['arrivals_busyness_id'] = 'A-' + arr_busyness_scores_by_date['dest'] + '-' + arr_busyness_scores_by_date['Flight Month'].astype(str)+ '-' + arr_busyness_scores_by_date['Flight Weekday'].astype(str)

arr_busyness_scores_by_date.head()


Unnamed: 0,dest,Flight Year,Flight Month,Flight Day,Flight Weekday,Total Daily Arrivals,arrivals_busyness_id
0,ABE,2018,1,1,0,10,A-ABE-1-0
1,ABE,2018,1,2,1,12,A-ABE-1-1
2,ABE,2018,1,3,2,13,A-ABE-1-2
3,ABE,2018,1,4,3,12,A-ABE-1-3
4,ABE,2018,1,5,4,11,A-ABE-1-4


In [108]:
avg_arr_busyness = pd.pivot_table(data = arr_busyness_scores_by_date, index = 'arrivals_busyness_id', values = 'Total Daily Arrivals')
avg_arr_busyness = avg_arr_busyness.reset_index()

#Rename columns
avg_arr_busyness.columns = ['Arrivals Busyness ID', 'Avg Daily Arrivals (Month and Weekday)']
avg_arr_busyness.head(3)

Unnamed: 0,Arrivals Busyness ID,Avg Daily Arrivals (Month and Weekday)
0,A-ABE-1-0,13.777778
1,A-ABE-1-1,12.0
2,A-ABE-1-2,13.0


In [112]:
#save to CSV
avg_arr_busyness = avg_arr_busyness.to_csv('../../data/processed/arrival_busyness_scores.csv')

## Commented Out Cells

In [84]:
# avg_dep_busyness = pd.pivot_table(data = dep_busyness_scores_by_date, index = 'departure_busyness_id', values = 'Total Daily Departures')
# avg_dep_busyness = dep_busyness_scores_by_date.reset_index()
# avg_dep_busyness.head(20)

In [None]:
# avg_dep_busyness.columns = ['departure_busyness_id','Average Daily Departures']
# avg_dep_busyness.head()

In [None]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels=['Flight Month', 'Flight Day', 'Flight Weekday'], axis=1)
# dep_busyness_scores_by_date.head()

In [17]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels='origin', axis=1)
# dep_busyness_scores_by_date.head()

In [18]:
# dep_busyness_scores_by_date = pd.merge(dep_busyness_scores_by_date, avg_dep_busyness, on = 'departure_busyness_id', how = 'left')

In [53]:
# dep_busyness_scores_by_date.head()

In [54]:
# dep_busyness_scores_by_date.columns

In [55]:
# #rearranage columns
# dep_busyness_scores_by_date = dep_busyness_scores_by_date[['Flight Date', 'origin', 'departure_busyness_id', 'Total Daily Departures', 'Average Daily Departures']]
# dep_busyness_scores_by_date.head()

In [56]:
# dep_busyness_scores_by_date['dep_busyness_score'] = dep_busyness_scores_by_date['Total Daily Departures'] / dep_busyness_scores_by_date['Average Daily Departures']
# dep_busyness_scores_by_date.head()

In [57]:
# dep_busyness_scores_by_date = dep_busyness_scores_by_date.drop(labels = ['Total Daily Departures', 'Average Daily Departures', 'departure_busyness_id'], axis=1)

In [58]:
# dep_busyness_scores_by_date.head()

In [None]:
dep_busyness_scores_by_date.columns = ['Flight Date', 'Origin Airport (IATA Code)', 'Departure Busyness Score']
dep_busyness_scores_by_date.head()

In [33]:
dep_busyness_scores_by_date.to_csv('../../data/processed/departure_busyness_scores.csv')

In [25]:
# departures_by_day = pd.pivot_table(data = departures, index = ["origin", "Flight Year", "Flight Month", "Flight Day", "Flight Weekday"], values = 'num_flights', aggfunc='count')
# departures_by_day = departures_by_day.reset_index()
# departures_by_day

In [26]:
# departures_by_day.sort_values("num_flights", ascending=False)

In [27]:
# departures_by_day.info()

In [28]:
# avg_departures_by_day = pd.pivot_table(data = departures_by_day, index = ["origin", "Flight Month", "Flight Weekday"], values = "num_flights")
# avg_departures_by_day = avg_departures_by_day.reset_index()
# avg_departures_by_day.sort_values("num_flights", ascending=False)

In [29]:
# avg_departures_by_day['departure_busyness_id'] = avg_departures_by_day['origin'] + '-' + avg_departures_by_day['Flight Month'].astype(str)+ '-' + avg_departures_by_day['Flight Weekday'].astype(str)
# avg_departures_by_day.head()

In [30]:
# arrivals.head()

In [31]:
# arrivals['num_flights'] = 1
# arrivals.head()