In [1]:
import pandas as pd
from IPython.display import display
from pandas.tseries.holiday import USFederalHolidayCalendar

In [2]:
wego = pd.read_csv("../data/Route 50 Timepoint and Headway Data, 1-1-2023 through 5-12-2025.csv")

# WeGo Public Transit
[WeGo Public Transit](https://www.wegotransit.com/) is a public transit system serving the Greater Nashville and Davidson County area. WeGo provides local and regional bus routes, the WeGo Star train service connecting Lebanon to downtown Nashville, along with several other transit services.

The data for this project can be downloaded from [here](https://drive.google.com/drive/folders/1L8d3xEaPD13BMz_k-3G8XRRLvPIbNRq9?usp=sharing).

Since 2019, WeGo has been using [**Transit Signal Priority (TSP)**](https://www.wegotransit.com/projects/transit-signal-priority/), a technology that helps to manage traffic flow more efficiently. For buses it reduces wait times at traffic signals by holding green lights longer, shortening red lights or in some cases allowing buses to bypass traffic. 

The data that you have been provided was collected for trips on Route 50, Charlotte Pike. TSP has been used on portions of this route, with different periods of being on or off, either conditionally or unconditionally. For these timespans, TSP was used between White Bridge and MCC, including all intervening timepoints, in both directions.
The important dates are as follows:

* February 3rd @ 12 noon: TSP Turned On (Unconditional)

* February 10th @ 12 noon: TSP Schedule-Conditional Priority Begins (Only buses more than 2 minutes late receive priority)

* April 28th @ 12 noon: TSP Turned Off

* May 5th @ 12 noon: TSP Turned On (Unconditional)

* May 12th @ 12 noon: TSP Headway-Conditional TSP Priority Begins (Only gapped buses with actual leading headway more than 120% of scheduled headway receive priority)


In [3]:
wego.head()

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ACTUAL_HDWY,HDWY_DEV,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS
0,120230101,3,93549161,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,,,0,0,1,0,0.0,0,2,8.133333
1,120230101,3,93549162,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,,,0,0,1,0,0.0,0,5,0.0
2,120230101,3,93549163,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,,,0,0,1,0,0.0,0,11,0.0
3,120230101,3,93549164,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,,,0,0,1,0,0.0,0,13,0.0
4,120230101,3,93549165,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,,,0,0,1,0,0.0,0,18,2.15


In [4]:
(
    wego
    .loc[wego['CALENDAR_ID'] == 120240203]
    .loc[wego['TRIP_ID'] == 371878]
    [[
        'DATE', 'CALENDAR_ID', 'TRIP_ID', 'ROUTE_ABBR',
        'TIME_POINT_ABBR', 'TRIP_EDGE',
        'SCHEDULED_TIME', 'ACTUAL_DEPARTURE_TIME', 'ADHERENCE',
        'ADJUSTED_EARLY_COUNT', 'ADJUSTED_LATE_COUNT', 'ADJUSTED_ONTIME_COUNT'
    ]]
)

Unnamed: 0,DATE,CALENDAR_ID,TRIP_ID,ROUTE_ABBR,TIME_POINT_ABBR,TRIP_EDGE,SCHEDULED_TIME,ACTUAL_DEPARTURE_TIME,ADHERENCE,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT
282333,2024-02-03,120240203,371878,50,WALM,1,05:34:00,05:34:45,-0.75,0,0,1
282334,2024-02-03,120240203,371878,50,HLWD,0,05:40:00,05:40:04,-0.066666,0,0,1
282335,2024-02-03,120240203,371878,50,WHBG,0,05:47:00,05:45:43,1.283333,1,0,0
282336,2024-02-03,120240203,371878,50,CH46,0,05:50:00,05:50:40,-0.666666,0,0,1
282337,2024-02-03,120240203,371878,50,28&CHARL,0,05:54:00,05:54:52,-0.866666,0,0,1
282338,2024-02-03,120240203,371878,50,MCC5_1,2,06:05:00,06:04:18,0.7,0,0,1


In [5]:
wego['DAY_TYPE'] = wego['SERVICE_ABBR'].replace({1: 'Weekday', 2: 'Saturday', 3: 'Sunday'}).astype('category')

In [6]:
holiday_cal = USFederalHolidayCalendar()
holidays = holiday_cal.holidays(start=wego['DATE'].min(), end=wego['DATE'].max())

In [7]:
holidays

DatetimeIndex(['2023-01-02', '2023-01-16', '2023-02-20', '2023-05-29',
               '2023-06-19', '2023-07-04', '2023-09-04', '2023-10-09',
               '2023-11-10', '2023-11-23', '2023-12-25', '2024-01-01',
               '2024-01-15', '2024-02-19', '2024-05-27', '2024-06-19',
               '2024-07-04', '2024-09-02', '2024-10-14', '2024-11-11',
               '2024-11-28', '2024-12-25', '2025-01-01', '2025-01-20',
               '2025-02-17'],
              dtype='datetime64[ns]', freq=None)

In [8]:
wego['IS_HOLIDAY'] = wego['DATE'].isin(holidays)
wego

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,DAY_TYPE,IS_HOLIDAY
0,120230101,3,93549161,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,2,8.133333,Sunday,False
1,120230101,3,93549162,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,5,0.000000,Sunday,False
2,120230101,3,93549163,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,11,0.000000,Sunday,False
3,120230101,3,93549164,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,13,0.000000,Sunday,False
4,120230101,3,93549165,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,18,2.150000,Sunday,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618993,120250512,1,119531049,2025-05-12,50,8401,3077,432121,0,FROM DOWNTOWN,...,1,0,0,0,,0,23,4.500000,Weekday,False
618994,120250512,1,119531638,2025-05-12,50,9302,3246,432353,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,2,27.283333,Weekday,False
618995,120250512,1,119531639,2025-05-12,50,9302,3246,432353,0,TO DOWNTOWN,...,0,0,1,0,,0,3,0.133333,Weekday,False
618996,120250512,1,119531770,2025-05-12,50,9950,2448,432387,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,2,8.166666,Weekday,False


In [9]:
cln_wego = wego[(wego['OVERLOAD_ID'] == 0) &
(wego['TRIP_EDGE'] == 0) &
(wego['STOP_CANCELLED'] == 0) &
(~wego['TIME_POINT_ABBR'].str.contains('HLWD', na=False))]

In [28]:
cln_wego

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,DAY_TYPE,IS_HOLIDAY
2,120230101,3,93549163,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,11,0.00,Sunday,False
3,120230101,3,93549164,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,13,0.00,Sunday,False
4,120230101,3,93549165,2023-01-01,50,5000,2355,332422,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,18,2.15,Sunday,False
7,120230101,3,93549168,2023-01-01,50,5000,2355,332423,0,FROM DOWNTOWN,...,0,0,1,0,0.0,0,33,0.00,Sunday,False
8,120230101,3,93549169,2023-01-01,50,5000,2355,332423,0,FROM DOWNTOWN,...,0,0,1,0,0.0,0,38,0.00,Sunday,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618979,120250512,1,119524761,2025-05-12,50,5006,754,429696,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,67,0.00,Weekday,False
618980,120250512,1,119524762,2025-05-12,50,5006,754,429696,0,TO DOWNTOWN,...,0,0,1,0,0.0,0,72,0.00,Weekday,False
618983,120250512,1,119524765,2025-05-12,50,5006,754,429697,0,FROM DOWNTOWN,...,0,0,1,0,0.0,0,87,0.00,Weekday,False
618984,120250512,1,119524766,2025-05-12,50,5006,754,429697,0,FROM DOWNTOWN,...,0,0,1,0,0.0,0,92,0.00,Weekday,False


The first main variable you will be studying in this project is **adherence**, which compares the actual departure time to the scheduled time and is included in the ADHERENCE column. A negative adherence value means that a bus left a time point late and a positive adherence indicates that the bus left the time point early. Buses with adherence values beyond negative 6 are generally considered late and beyond positive 1 are considered early. However, there is some additional logic where the staff applies waivers to allow early departures. For example, express buses that have already picked up everyone at a park-and-ride lot and are only dropping off passengers may be allowed to leave early.  Early departures are also permitted at the end of a trip (when TRIP_EDGE = 2), since they do not affect upstream passengers. **Note:** When determining whether a bus is early or late, it is advised that you use the 'ADJUSTED_EARLY_COUNT', 'ADJUSTED_LATE_COUNT', and 'ADJUSTED_ONTIME_COUNT' columns in order to account for the adjustments.

In [29]:
adjusted_counts = cln_wego[['DATE', 'DAY_TYPE', 'CALENDAR_ID', 'IS_HOLIDAY', 'OPERATOR', 'TRIP_ID', 'TIME_POINT_ABBR', 'TRIP_EDGE', 'SCHEDULED_TIME', 'ACTUAL_ARRIVAL_TIME', 'ADHERENCE', 'DWELL_IN_MINS', 'SCHEDULED_HDWY', 'ACTUAL_HDWY', 'HDWY_DEV', 'ROUTE_DIRECTION_NAME', 'ADJUSTED_EARLY_COUNT', 'ADJUSTED_LATE_COUNT', 'ADJUSTED_ONTIME_COUNT']]

In [11]:
#adjusted_counts

In [12]:
# adjusted_counts.loc[(adjusted_counts['CALENDAR_ID'] == 120250203) & (adjusted_counts['ADHERENCE'] < -6)]

In [13]:
# adjusted_counts.loc[(adjusted_counts['CALENDAR_ID'] == 120250210) & (adjusted_counts['ADHERENCE'] < -6)]

In [14]:
# adjusted_counts.loc[(adjusted_counts['CALENDAR_ID'] == 120250428) & (adjusted_counts['ADHERENCE'] < -6)]

In [15]:
# adjusted_counts.loc[(adjusted_counts['CALENDAR_ID'] == 120250505) & (adjusted_counts['ADHERENCE'] < -6)]

In [30]:
adjusted_counts.loc[
    (adjusted_counts['CALENDAR_ID'] == 120250512) & 
    (adjusted_counts['ADHERENCE'] < -6) & 
    (adjusted_counts['TIME_POINT_ABBR'] == 'WHBG')]

Unnamed: 0,DATE,DAY_TYPE,CALENDAR_ID,IS_HOLIDAY,OPERATOR,TRIP_ID,TIME_POINT_ABBR,TRIP_EDGE,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,ADHERENCE,DWELL_IN_MINS,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,ROUTE_DIRECTION_NAME,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT
618450,2025-05-12,Weekday,120250512,False,2109,429504,WHBG,0,10:16:00,10:25:23,-9.383333,0.0,15.0,22.933333,7.933333,TO DOWNTOWN,0,1,0
618457,2025-05-12,Weekday,120250512,False,2109,429505,WHBG,0,11:03:00,11:09:50,-6.833333,0.0,15.0,20.633333,5.633333,FROM DOWNTOWN,0,1,0
618774,2025-05-12,Weekday,120250512,False,1587,429628,WHBG,0,12:46:00,12:50:21,-6.533333,2.183333,15.0,20.216666,5.216666,TO DOWNTOWN,0,1,0
618793,2025-05-12,Weekday,120250512,False,1764,429631,WHBG,0,15:07:00,15:13:20,-6.333333,0.0,15.0,20.2,5.2,FROM DOWNTOWN,0,1,0
618810,2025-05-12,Weekday,120250512,False,2362,429634,WHBG,0,17:46:00,17:56:24,-12.983333,2.583333,17.0,26.3,9.3,TO DOWNTOWN,0,1,0
618817,2025-05-12,Weekday,120250512,False,2362,429635,WHBG,0,18:33:00,18:39:07,-6.116666,0.0,15.0,19.55,4.55,FROM DOWNTOWN,0,1,0
618889,2025-05-12,Weekday,120250512,False,3328,429681,WHBG,0,09:18:00,09:25:51,-7.85,0.0,15.0,22.183333,7.183333,FROM DOWNTOWN,0,1,0
618937,2025-05-12,Weekday,120250512,False,2706,429689,WHBG,0,15:22:00,15:30:01,-8.016666,0.0,15.0,16.683333,1.683333,FROM DOWNTOWN,0,1,0
618942,2025-05-12,Weekday,120250512,False,2706,429690,WHBG,0,16:10:00,16:14:17,-9.3,5.016666,15.0,22.85,7.85,TO DOWNTOWN,0,1,0


In [38]:
cln_wego['TIME_POINT_ABBR'].head(12)

2         WHBG
3         CH46
4     28&CHARL
7     28&CHARL
8         CH46
9         WHBG
14        WHBG
15        CH46
16    28&CHARL
19    28&CHARL
20        CH46
21        WHBG
Name: TIME_POINT_ABBR, dtype: object

In [39]:
adjusted_counts[(adjusted_counts['ADHERENCE'] < -6)].sort_values('ADHERENCE')

Unnamed: 0,DATE,DAY_TYPE,CALENDAR_ID,IS_HOLIDAY,OPERATOR,TRIP_ID,TIME_POINT_ABBR,TRIP_EDGE,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,ADHERENCE,DWELL_IN_MINS,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,ROUTE_DIRECTION_NAME,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT
434647,2024-08-30,Weekday,120240830,False,3339,394699,CH46,0,16:02:00,17:14:22,-451.866666,379.500000,15.0,36.716666,21.716666,FROM DOWNTOWN,0,1,0
583793,2025-03-25,Weekday,120250325,False,3296,423752,WHBG,0,18:33:00,23:00:29,-267.483333,0.000000,15.0,29.533333,14.533333,FROM DOWNTOWN,0,1,0
433464,2024-08-29,Weekday,120240829,False,1705,394524,CH46,0,13:49:00,14:05:18,-236.733333,220.433333,14.0,11.750000,-2.250000,TO DOWNTOWN,0,1,0
582402,2025-03-24,Weekday,120250324,False,2094,423507,28&CHARL,0,09:24:00,12:24:24,-192.233333,11.833333,15.0,24.350000,9.350000,TO DOWNTOWN,0,1,0
582400,2025-03-24,Weekday,120250324,False,2094,423507,WHBG,0,09:16:00,12:15:58,-179.966666,0.000000,14.0,13.733333,-0.266667,TO DOWNTOWN,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546065,2025-02-01,Saturday,120250201,False,3358,417815,28&CHARL,0,12:34:00,12:40:01,-6.016666,0.000000,20.0,12.316666,-7.683334,TO DOWNTOWN,0,1,0
433816,2024-08-29,Weekday,120240829,False,2033,394695,28&CHARL,0,12:41:00,12:47:01,-6.016666,0.000000,15.0,13.516666,-1.483334,FROM DOWNTOWN,0,1,0
566567,2025-03-02,Sunday,120250302,False,357,423549,CH46,0,13:30:00,13:36:01,-6.016666,0.000000,20.0,26.683333,6.683333,TO DOWNTOWN,0,1,0
248536,2023-12-16,Saturday,120231216,False,3222,366961,28&CHARL,0,19:24:00,19:27:58,-6.016666,2.050000,30.0,34.050000,4.050000,TO DOWNTOWN,0,1,0


In [33]:
april28_data = adjusted_counts.loc[
    (adjusted_counts['CALENDAR_ID'] == 120250428)]
    

all_april28_avg_adherence = april28_data['ADHERENCE'].mean()
all_april28_max_adherence = april28_data['ADHERENCE'].max()
all_april28_min_adherence = april28_data['ADHERENCE'].min()

print(f"April 28th, 2025 Adherence")
print(f"Average Adherence: {all_april28_avg_adherence}")
print(f"Maximum Adherence: {all_april28_max_adherence}")
print(f"Minimum Adherence: {all_april28_min_adherence}")

April 28th, 2025 Adherence
Average Adherence: -3.378156277777778
Maximum Adherence: 1.616666
Minimum Adherence: -18.883333


In [34]:
april28_data_whbg = adjusted_counts.loc[
    (adjusted_counts['CALENDAR_ID'] == 120250428) & 
    (adjusted_counts['TIME_POINT_ABBR'] == 'WHBG')]

april28_avg_adherence = april28_data_whbg['ADHERENCE'].mean()
april28_max_adherence = april28_data_whbg['ADHERENCE'].max()
april28_min_adherence = april28_data_whbg['ADHERENCE'].min()

print(f"White Bridge, April 28th, 2025 Adherence")
print(f"Average Adherence: {april28_avg_adherence}")
print(f"Maximum Adherence: {april28_max_adherence}")
print(f"Minimum Adherence: {april28_min_adherence}")

White Bridge, April 28th, 2025 Adherence
Average Adherence: -3.5010098257575755
Maximum Adherence: 1.616666
Minimum Adherence: -18.883333


In [35]:
may5_data_whbg = adjusted_counts.loc[
    (adjusted_counts['CALENDAR_ID'] == 120250505) & 
    (adjusted_counts['TIME_POINT_ABBR'] == 'WHBG')]

may5_avg_adherence = may5_data_whbg['ADHERENCE'].mean()
may5_max_adherence = may5_data_whbg['ADHERENCE'].max()
may5_min_adherence = may5_data_whbg['ADHERENCE'].min()

print(f"White Bridge, May 5th, 2025 Adherence")
print(f"Average Adherence: {may5_avg_adherence}")
print(f"Maximum Adherence: {may5_max_adherence}")
print(f"Minimum Adherence: {may5_min_adherence}")

White Bridge, May 5th, 2025 Adherence
Average Adherence: -3.554615123076923
Maximum Adherence: 1.95
Minimum Adherence: -18.133333


In [36]:
may12_data_whbg = adjusted_counts.loc[
    (adjusted_counts['CALENDAR_ID'] == 120250512) & 
    (adjusted_counts['TIME_POINT_ABBR'] == 'WHBG')]

may12_avg_adherence = may12_data_whbg['ADHERENCE'].mean()
may12_max_adherence = may12_data_whbg['ADHERENCE'].max()
may12_min_adherence = may12_data_whbg['ADHERENCE'].min()

print(f"White Bridge, May 12th, 2025 Adherence")
print(f"Average Adherence: {may12_avg_adherence}")
print(f"Maximum Adherence: {may12_max_adherence}")
print(f"Minimum Adherence: {may12_min_adherence}")

White Bridge, May 12th, 2025 Adherence
Average Adherence: -2.2264628396946566
Maximum Adherence: 1.916666
Minimum Adherence: -12.983333


In [37]:
early_mean = wego['ADJUSTED_EARLY_COUNT'].mean()
late_mean = wego['ADJUSTED_LATE_COUNT'].mean()
ontime_mean = wego['ADJUSTED_ONTIME_COUNT'].mean()

total = early_mean + late_mean + ontime_mean

early_percent = (early_mean / total) * 100
late_percent = (late_mean / total) * 100
ontime_percent = (ontime_mean / total) * 100

print(f"Overall Percentage")
print(f"Early: {early_percent:.2f}%")
print(f"Late: {late_percent:.2f}%")
print(f"Ontime: {ontime_percent:.2f}%")

Overall Percentage
Early: 3.02%
Late: 12.19%
Ontime: 84.80%


The second main variable you'll be looking at is **headway**.  This is the amount of time between a bus and the prior bus at the same stop. In the dataset, the amount of headway scheduled is contained in the SCHEDULED_HDWY column and indicates the difference between the scheduled time for a particular stop and the scheduled time for the previous bus on that same stop.
This dataset contains a column HDWY_DEV, which shows the amount of deviation from the scheduled headway. **Bunching** occurs when there is shorter headway than scheduled, which would appear as a negative HDWY_DEV value. **Gapping** is when there is more headway than scheduled and appears as a positive value in the HDWY_DEV column. Note that you can calculate headway deviation percentage as HDWY_DEV/SCHEDULED_HDWY. The generally accepted range of headway deviation is 50% to 150% of the scheduled headway, so if scheduled headway is 10 minutes, a headway deviation of up to 5 minutes would be acceptable (but not ideal).


In [40]:
non_nan_values = cln_wego['SCHEDULED_HDWY'].dropna()
non_nan_values

14        30.0
15        30.0
16        30.0
19        30.0
20        30.0
          ... 
618979    15.0
618980    15.0
618983    15.0
618984    15.0
618985    15.0
Name: SCHEDULED_HDWY, Length: 297736, dtype: float64

In [41]:
non_null_headway = cln_wego[['DATE', 'DAY_TYPE', 'IS_HOLIDAY', 'SCHEDULED_TIME', 'ACTUAL_ARRIVAL_TIME', 'TIME_POINT_ABBR', 'SCHEDULED_HDWY', 'ACTUAL_HDWY', 'HDWY_DEV']].dropna()
non_null_headway 

Unnamed: 0,DATE,DAY_TYPE,IS_HOLIDAY,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,TIME_POINT_ABBR,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV
14,2023-01-01,Sunday,False,07:17:00,07:20:48,WHBG,30.0,34.016666,4.016666
15,2023-01-01,Sunday,False,07:20:00,07:24:18,CH46,30.0,34.066666,4.066666
16,2023-01-01,Sunday,False,07:24:00,07:26:41,28&CHARL,30.0,32.516666,2.516666
19,2023-01-01,Sunday,False,07:55:00,07:56:33,28&CHARL,30.0,31.233333,1.233333
20,2023-01-01,Sunday,False,07:59:00,07:59:04,CH46,30.0,32.166666,2.166666
...,...,...,...,...,...,...,...,...,...
618979,2025-05-12,Weekday,False,16:44:00,16:49:25,CH46,15.0,15.900000,0.900000
618980,2025-05-12,Weekday,False,16:48:00,16:53:47,28&CHARL,15.0,15.983333,0.983333
618983,2025-05-12,Weekday,False,17:27:00,17:28:46,28&CHARL,15.0,11.383333,-3.616667
618984,2025-05-12,Weekday,False,17:32:00,17:33:19,CH46,15.0,9.816666,-5.183334


In [42]:
non_null_headway['HDWY_DEV_PCT'] = (non_null_headway['HDWY_DEV'] / non_null_headway['SCHEDULED_HDWY']) * 100
non_null_headway.sort_values(['HDWY_DEV'])

Unnamed: 0,DATE,DAY_TYPE,IS_HOLIDAY,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,TIME_POINT_ABBR,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,HDWY_DEV_PCT
155958,2023-08-10,Weekday,False,06:32:00,06:54:56,WHBG,35.0,1.750000,-33.250000,-95.000000
354702,2024-05-11,Saturday,False,23:54:00,23:53:08,28&CHARL,30.0,0.016666,-29.983334,-99.944447
354263,2024-05-11,Saturday,False,22:01:00,22:06:28,WHBG,30.0,0.233333,-29.766667,-99.222223
134741,2023-07-11,Weekday,False,19:47:00,20:21:16,WHBG,31.0,1.733333,-29.266667,-94.408603
155957,2023-08-10,Weekday,False,06:28:00,06:35:30,CH46,35.0,5.816666,-29.183334,-83.380954
...,...,...,...,...,...,...,...,...,...,...
284054,2024-02-05,Weekday,False,19:20:00,19:45:57,CH46,30.0,129.366666,99.366666,331.222220
284053,2024-02-05,Weekday,False,19:16:00,19:41:02,WHBG,30.0,130.233333,100.233333,334.111110
269020,2024-01-15,Saturday,False,19:47:00,19:47:04,WHBG,30.0,139.300000,109.300000,364.333333
71119,2023-04-11,Weekday,False,23:54:00,23:54:13,28&CHARL,30.0,146.500000,116.500000,388.333333


In [81]:
non_null_headway.sort_values(['HDWY_DEV_PCT'])

Unnamed: 0,DATE,DAY_TYPE,IS_HOLIDAY,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,TIME_POINT_ABBR,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,HDWY_DEV_PCT
90619,2023-05-09,Weekday,False,14:42:00,14:44:02,28&CHARL,16.0,0.000000,-16.000000,-100.000000
142738,2023-07-22,Saturday,False,12:09:00,12:11:35,CH46,20.0,0.000000,-20.000000,-100.000000
524475,2025-01-02,Weekday,False,15:48:00,15:45:33,28&CHARL,14.0,0.000000,-14.000000,-100.000000
13044,2023-01-19,Weekday,False,16:50:00,16:53:36,CH46,15.0,0.000000,-15.000000,-100.000000
65974,2023-04-04,Weekday,False,17:42:00,17:42:18,28&CHARL,15.0,0.000000,-15.000000,-100.000000
...,...,...,...,...,...,...,...,...,...,...
71119,2023-04-11,Weekday,False,23:54:00,23:54:13,28&CHARL,30.0,146.500000,116.500000,388.333333
529637,2025-01-09,Weekday,False,16:29:00,17:14:42,CH46,15.0,73.633333,58.633333,390.888887
529638,2025-01-09,Weekday,False,16:33:00,17:20:39,28&CHARL,15.0,75.183333,60.183333,401.222220
529636,2025-01-09,Weekday,False,16:25:00,17:13:12,WHBG,15.0,76.966666,61.966666,413.111107


In [90]:
hdwy_dev_filtered= non_null_headway[non_null_headway['HDWY_DEV'].between(15, 30, inclusive='both')]

In [89]:
hdwy_per_filtered= non_null_headway[non_null_headway['HDWY_DEV_PCT'].between(150, 500, inclusive='both')]

In [91]:
hdwy_dev_filtered.sort_values(['ACTUAL_HDWY'])

Unnamed: 0,DATE,DAY_TYPE,IS_HOLIDAY,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,TIME_POINT_ABBR,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,HDWY_DEV_PCT
337257,2024-04-18,Weekday,False,18:04:00,18:06:39,WHBG,12.0,27.366666,15.366666,128.055550
301265,2024-02-28,Weekday,False,15:06:00,15:24:02,28&CHARL,13.0,28.033333,15.033333,115.641023
523075,2024-12-31,Weekday,False,18:00:00,17:59:55,CH46,13.0,28.133333,15.133333,116.410254
482561,2024-11-04,Weekday,False,15:06:00,15:06:24,28&CHARL,13.0,28.200000,15.200000,116.923077
527042,2025-01-06,Weekday,False,15:30:00,15:28:09,CH46,13.0,28.283333,15.283333,117.564100
...,...,...,...,...,...,...,...,...,...,...
560223,2025-02-21,Weekday,False,06:24:00,06:28:02,28&CHARL,35.0,63.966666,28.966666,82.761903
181967,2023-09-16,Saturday,False,05:54:00,06:16:44,28&CHARL,55.0,77.883333,22.883333,41.606060
181966,2023-09-16,Saturday,False,05:50:00,06:13:03,CH46,55.0,78.333333,23.333333,42.424242
73620,2023-04-15,Saturday,False,06:25:00,06:43:09,28&CHARL,62.0,79.650000,17.650000,28.467742


How has TSP affected these two metrics? Keep in mind that there are many other factors that could also be contributing, so be sure to take into account things like day of the week, time of day, time of year (school in session or not), or other factors that may also be affecting adherence or headway deviation.