In [2]:
import pandas as pd

In [7]:
def add_taxi_Ndays_rolling(df, days):
    """
    This function calculates and adds additional columns for rolling taxi_in/taxi_out time per airport.
    
    Args:
        df - df to process as DataFrame
        days - Days to calculate rolling number for taxi_in, taxi_out time
    Output:
        processed DataFrame is returned back
    """
    cols={'origin':['origin_airport_id', 'taxi_out'],
             'destination':['dest_airport_id','taxi_in']}
    
    df = df.sort_values(['fl_date']) #Sorting by fl_date just in case it was not sorted before. 
                                        #It is important for rolling average
    
    #Iterating the keys in cols which has columns we interested in.
    for key in cols.keys():
        
        #First we calculate average taxi time per airport per day
        df_taxi=df[[cols[key][0], 'fl_date',  cols[key][1]]].groupby([cols[key][0], 'fl_date']).mean().reset_index()

        #Based on our average taxi time we can calculate rolling average
        df_taxi_roll=df_taxi.groupby([cols[key][0]]).rolling(days, on='fl_date'
                                                                           ).agg({cols[key][1]:'mean'}).reset_index()
        #Renaming column to avoid collision during merging
        df_taxi_roll.rename(columns={cols[key][1]: str(days) +'d ' + cols[key][1]}, inplace=True)
        
        #Merging with initial DataFrame
        df=df.merge(df_taxi_roll, on=[cols[key][0], 'fl_date' ] , how='left')
    return df

In [5]:
#For the purpose of Analysis we have taken flight for DL carrier in 2019.
df = pd.read_csv('UA flights 2019.csv', low_memory=False, parse_dates=[0])


In [8]:
df = add_taxi_Ndays_rolling(df, 30)

In [9]:
df

Unnamed: 0,fl_date,mkt_carrier,mkt_carrier_fl_num,tail_num,op_carrier_fl_num,origin_airport_id,origin,origin_city_name,dest_airport_id,dest,...,weather_delay,nas_delay,security_delay,late_aircraft_delay,first_dep_time,total_add_gtime,longest_add_gtime,no_name,30d taxi_out,30d taxi_in
0,2019-01-01,UA,5611,N947SW,5611,14689,SBA,"Santa Barbara, CA",12892,LAX,...,,,,,,,,,,
1,2019-01-01,UA,4902,N11181,4902,11618,EWR,"Newark, NJ",13485,MSN,...,,,,,,,,,,
2,2019-01-01,UA,4901,N14179,4901,12264,IAD,"Washington, DC",10990,CHO,...,,,,,,,,,,
3,2019-01-01,UA,4900,N16183,4900,11618,EWR,"Newark, NJ",12264,IAD,...,,,,,,,,,,
4,2019-01-01,UA,4880,N16147,4880,11618,EWR,"Newark, NJ",11066,CMH,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1571399,2019-12-31,UA,3534,N861RW,3534,12339,IND,"Indianapolis, IN",12266,IAH,...,,,,,,,,,18.367877,8.386432
1571400,2019-12-31,UA,3535,N862RW,3535,13871,OMA,"Omaha, NE",13930,ORD,...,,,,,,,,,20.426397,11.688253
1571401,2019-12-31,UA,3536,N732YX,3536,11278,DCA,"Washington, DC",11618,EWR,...,,,,,,,,,20.009218,13.432760
1571402,2019-12-31,UA,3523,N650RW,3523,11618,EWR,"Newark, NJ",13198,MCI,...,,,,,,,,,28.253805,5.892857
