In [17]:
import plotly.express as px
import numpy as np
import pandas as pd
basepath = 'your_path_here'

In [6]:
ontime = pd.read_parquet(basepath + 'Data/OnTime/OnTimeData.snappy.parquet')
ontime['Greater Than 15'] = np.where(ontime['DepDel15']== 1, "Yes", "No")
ontime_delay = ontime[(ontime['DepDelayMinutes'] > 0) & (ontime['DepDel15'].isin([0,1]))]
ontime_del15 = ontime[(ontime['DepDelayMinutes'] > 0) & (ontime['DepDel15'].isin([1]))]

In [15]:
Flights_By_Year = ontime.groupby(['Year']).agg({'Flights': "count"})
Flights_By_Year.reset_index(inplace=True)
Delay_By_Year_df = ontime_delay.groupby(['Year','Greater Than 15']).agg({"DepDelayMinutes": "count"}).rename(columns={"DepDelayMinutes":'Count'})
Delay_By_Year_df.reset_index(inplace=True)

In [14]:
Flights_By_Year = ontime.groupby(['Year']).agg({'Flights': "count"})
Flights_By_Year.reset_index(inplace=True)
Delay_By_Year_df = ontime_delay.groupby(['Year', 'Greater Than 15']).agg({"DepDelayMinutes": "count"}).rename(columns={"DepDelayMinutes":'Count'})
Delay_By_Year_df.reset_index(inplace=True)

#inner works because we know they all have matches anyway
Delay_By_Year_Pct1 = Delay_By_Year_df.merge(Flights_By_Year, how = 'inner', on= 'Year', sort= True)
Delay_By_Year_Pct1['Percent of Delayed Flights'] = Delay_By_Year_Pct1['Count'] / Delay_By_Year_Pct1['Flights']

#'Delays - 15 Minutes or More Vs Less than 15 - ALL Flights',
fig = px.bar(data_frame= Delay_By_Year_Pct1, y= 'Year', x= 'Percent of Delayed Flights', color = 'Greater Than 15', orientation = 'h',
                      height = 600, width = 600)
fig.update_xaxes(showgrid=False)
fig.write_html(basepath + "Code/VisualWork/Delay_15Flag.html")
fig.write_image(basepath + "/Code/VisualWork/Delay_15Flag.png")
fig


## Now just DFW

In [16]:
ontime_dfw = ontime[ontime['Origin'] == 'DFW']
Flights_By_Year = ontime_dfw.groupby(['Year']).agg({'Flights': "count"})
Flights_By_Year.reset_index(inplace=True)

ontime_delay_dfw = ontime_delay[ontime_delay['Origin'] == 'DFW']
Delay_By_Year_df = ontime_delay_dfw.groupby(['Year','Greater Than 15']).agg({"DepDelayMinutes": "count"}).rename(columns={"DepDelayMinutes":'Count'})
Delay_By_Year_df.reset_index(inplace=True)

#inner works because we know they all have matches anyway
Delay_By_Year_Pct1 = Delay_By_Year_df.merge(Flights_By_Year, how = 'inner', on= 'Year', sort= True)
Delay_By_Year_Pct1['Percent of Delayed Flights'] = Delay_By_Year_Pct1['Count'] / Delay_By_Year_Pct1['Flights']

# Delays - 15 Minutes or More Vs Less than 15 - DFW only
fig = px.bar(data_frame = Delay_By_Year_Pct1, y = 'Year', x = 'Percent of Delayed Flights', color = 'Greater Than 15', orientation = 'h',
                     height = 600, width = 600)
fig.update_xaxes(showgrid=False)
fig.write_html(basepath + "Code/VisualWork/Delay_15Flag_DFW.html")
fig.write_image(basepath + "Code/VisualWork/Delay_15Flag_DFW.png")
fig

## Now breakdown the DepDel15's into delay categories based on minutes

Create dataframe for reporting the provide a column indicating the type of delay for each flight. There will be duplicative rows for flights depending on how many categories the delay falls in.

In [19]:
delay_type_df = pd.DataFrame()
delay_types = ['CarrierDelay', 'WeatherDelay', 'SecurityDelay', 'LateAircraftDelay']
ontime_del15['TotDelayMinutes'] = ontime_del15['CarrierDelay'] + ontime_del15['WeatherDelay'] + \
                                  ontime_del15['SecurityDelay'] + ontime_del15['LateAircraftDelay']

for type in delay_types:
    row_df = ontime_del15[ontime_del15[type] >= 0][['Year', type,'FlightDate', 'CRSDepTime', 'Origin', 'Dest', 'TotDelayMinutes' ]]
    row_df['Type of Delay'] = type
    row_df.rename(columns={type: 'type_minutes'}, inplace= True)
    delay_type_df = pd.concat([delay_type_df, row_df], axis= 0)

delay_type_df['Percent of Delay'] = (delay_type_df['type_minutes'] / delay_type_df['TotDelayMinutes']) * 100
delay_type_df['Percent of Delay'].fillna(0)

delay_type_df['DFW']  = np.where(delay_type_df['Origin'] == 'DFW', True, False)
delay_type_df['TypeCount']  = np.where(delay_type_df['type_minutes'] > 0, 1, 0)

delay_type_df.sort_values(by= ['Year', 'FlightDate', 'CRSDepTime', 'Origin', 'Dest', 'Type of Delay'], inplace= True)
delay_type_df.reset_index(drop= True, inplace= True)

# ALL FLIGHTS
Flights_By_Year = ontime.groupby(['Year']).agg({'Flights': "count"})
Flights_By_Year.reset_index(inplace=True)

DelayTypePct_By_Year_df = delay_type_df.groupby(['Year', 'Type of Delay']).agg({"Percent of Delay": "mean", 'TypeCount': 'sum'}).rename(columns=\
                            {"Percent of Delay":'Mean Percent of Delay', 'TypeCount': 'Count'})
DelayTypePct_By_Year_df.reset_index(inplace=True)

DelayTypePct_By_Year_Pct1 = DelayTypePct_By_Year_df.merge(Flights_By_Year, how = 'inner', on= 'Year', sort= True)
DelayTypePct_By_Year_Pct1['Percent of Flights'] = round((DelayTypePct_By_Year_Pct1['Count'] / DelayTypePct_By_Year_Pct1['Flights']) * 100, 2)

# Type of Delay - Mean Percentage - Delays 15 Minutes or More - ALL Flights
fig = px.bar(data_frame= DelayTypePct_By_Year_Pct1, y= 'Year', x= 'Percent of Flights', color='Type of Delay', orientation = 'h',
                     height = 600, width = 600)
fig.update_xaxes(showgrid=False)
fig.write_html(basepath + "Code/VisualWork/DelayTypePct.html")
fig.write_image(basepath + "Code/VisualWork/DelayTypePct.png")
fig.show()


# JUST DFW
ontime_dfw = ontime[ontime['Origin'] == 'DFW']
Flights_By_Year = ontime_dfw.groupby(['Year']).agg({'Flights': "count"})
Flights_By_Year.reset_index(inplace=True)

delay_type_df_dfw = delay_type_df[delay_type_df['DFW'] == True]
DelayTypePct_By_Year_df_dfw = delay_type_df_dfw.groupby(['Year', 'Type of Delay']).agg({"Percent of Delay": "mean", 'TypeCount': 'sum'}).rename(columns=\
                            {"Percent of Delay":'Mean Percent of Delay', 'TypeCount': 'Count'})
DelayTypePct_By_Year_df_dfw.reset_index(inplace=True)

DelayTypePct_By_Year_Pct1 = DelayTypePct_By_Year_df_dfw.merge(Flights_By_Year, how = 'inner', on= 'Year', sort= True)
DelayTypePct_By_Year_Pct1['Percent of Flights'] = round((DelayTypePct_By_Year_Pct1['Count'] / DelayTypePct_By_Year_Pct1['Flights']) * 100 ,2)

# Type of Delay - Mean Percentage - Delays 15 Minutes or More - DFW
fig = px.bar(data_frame= DelayTypePct_By_Year_Pct1, y= 'Year', x= 'Percent of Flights', color='Type of Delay', orientation = 'h',
                     height = 600, width = 600)
fig.update_xaxes(showgrid=False)
fig.write_html(basepath + "Code/VisualWork/DelayTypePct_DFW.html")
fig.write_image(basepath + "Code/VisualWork/DelayTypePct_DFW.png")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

