In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
wego = pd.read_csv('../data/Headway Data, 8-1-2023 to 9-30-2023.csv')
wego

In [None]:
pd.set_option('display.max_columns', None) ##to see all columns

In this project, you'll be analyzing the bus spacing to look for patterns and try to identify correlations to controllable or external factors. Specifically, you'll be using a dataset containing information on the headway, or amount of time between vehicle arrivals at a stop.

There are two main variables you will be studying in this project, headway deviation and adherence.

**Headway** is the amount of time between a bus and the prior bus at the same stop. In the dataset, the amount of headway scheduled is contained in the SCHEDULED_HDWY column and indicates the difference between the scheduled time for a particular stop and the scheduled time for the previous bus on that same stop.

This dataset contains a column HDWY_DEV, which shows the amount of deviation from the scheduled headway. **Bunching** occurs when there is shorter headway than scheduled, which would appear as a negative HDWY_DEV value. **Gapping** is when there is more headway than scheduled and appears as a positive value in the HDWY_DEV column. Note that you can calculate headway deviation percentage as HDWY_DEV/SCHEDULED_HDWY. The generally accepted range of headway deviation is 50% to 150% of the scheduled headway, so if scheduled headway is 10 minutes, a headway deviation of up to 5 minutes would be acceptable (but not ideal).

Another important variable is **adherence**, which compares the actual departure time to the scheduled time and is included in the ADHERENCE column. A negative adherence value means that a bus left a time point late and a positive adherence indicates that the bus left the time point early. Buses with adherence values beyond negative 6 are generally considered late and beyond positive 1 are considered early. However, there is some additional logic where the staff applies waivers to allow early departures, such as an express bus that has already picked up everyone at a park-and-ride lot and is only dropping people off at the remaining stops, and also allows for early timepoint records for all records where TRIP_EDGE = 2 (end of trip), since it is not a problem if a bus ends its trip early as long as it didn't pass other timepoints early along the way.

4. How much of a factor does the driver have on headway and on-time performance? The driver is indicated by the OPERATOR variable.

6. How much impact does being late or too spaced out at the first stop have downstream?

8. What is the relationship between distance or time travelled since the start of a given trip and the headway deviation? Does headway become less stable the further along the route the bus has travelled?

In [None]:
wego.loc[269830:269850]

In [None]:
wego['ROUTE_ABBR'].unique()

2. How does direction of travel, route, or location affect the headway and on-time performance?

In [None]:
direction = wego.groupby(['ROUTE_DIRECTION_NAME'])[['HDWY_DEV', 'ADHERENCE']].mean()
direction

In [None]:
direction.plot.bar();

In [None]:
plt.figure(figsize = (10,4))

sns.boxplot(data = wg3,
            x = 'HDWY_DEV');

In [None]:
wg3['HDWY_DEV'].hist(edgecolor = 'magenta');

In [None]:
wg3 = wego[['ROUTE_ABBR', 'HDWY_DEV']].groupby('ROUTE_ABBR').mean('HDWY_DEV')
wg3

In [None]:
wg3.plot(kind = 'bar',
         figsize = (10,6))            

plt.xticks(rotation = 0,                                             
           fontsize = 12)                   
plt.title('avg headway dev',
         fontsize = 14,
         fontweight = 'bold')
plot.show();

ROUTE_ABBR    |    ROUTE_DIRECTION_NAME	  |    
overload_id = 0


In [None]:
wg2 = wego[['DATE', 'CALENDAR_ID', 'TRIP_ID', 'ROUTE_ABBR', 'TIME_POINT_ABBR', 'TRIP_EDGE', 'SCHEDULED_TIME', 'ROUTE_DIRECTION_NAME', 'OVERLOAD_ID','ADHERENCE' , 'SCHEDULED_HDWY', 'ACTUAL_HDWY', 'HDWY_DEV', 'ADJUSTED_ONTIME_COUNT']]

In [None]:
reg_wg2 = wg2[wg2['OVERLOAD_ID'] == 0] ## 0 signifies that the record is from a trip that was part of the original schedule for the day and was not added by the dispatcher
fdt_wg2 = reg_wg2[reg_wg2['ROUTE_DIRECTION_NAME'] == 'FROM DOWNTOWN']
tdt_wg2 = reg_wg2[reg_wg2['ROUTE_DIRECTION_NAME'] == 'TO DOWNTOWN']

In [None]:
tdt_wg2

In [None]:
tdt_wg22 = tdt_wg2[tdt_wg2['ROUTE_ABBR'] == 22]
tdt_wg22['TIME_POINT_ABBR'].unique()

In [None]:
mhsp22 = tdt_wg22[tdt_wg22['TIME_POINT_ABBR'] == 'MHSP']
mhsp22.reset_index()
mhsp22

In [None]:
fdt_wg2

In [None]:
fdt_wg22 = fdt_wg2[fdt_wg2['ROUTE_ABBR'] == 22]
fdt_wg22

In [None]:
mhsp221 = fdt_wg22[fdt_wg22['TIME_POINT_ABBR'] == 'MHSP']
mhsp221.reset_index()
mhsp221

In [None]:
tdt_wg2['HDWY_DEV'].dropna().describe()
## to downtown

In [None]:
fdt_wg2['HDWY_DEV'].dropna().describe()
## from downtown

In [None]:
tdt_wg2['ADHERENCE'].dropna().describe()
## to downtown

In [None]:
fdt_wg2['ADHERENCE'].dropna().describe()
## from downtown

In [None]:
tdt_wg2['HDWY_DEV'].dropna().sort_values()

In [None]:
fdt_wg2['HDWY_DEV'].dropna().sort_values()

In [None]:
wego.dtypes