In [1]:
import pandas as pd

## Find weekend and weekdays only stops

In [None]:
# Load GTFS files
calendar = pd.read_csv("./data/calendar.txt")
calendar_dates = pd.read_csv("./data/calendar_dates.txt")
trips = pd.read_csv("./data/trips.txt")
stop_times = pd.read_csv("./data/stop_times.txt")
stops = pd.read_csv("./data/stops.txt")

# Join trips with calendar to get service info
trips = trips.merge(calendar, on="service_id", how="left")

# Determine days active for each trip
def get_trip_type(row):
    days = row[['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']]
    if all(days[:5] == 1):  
        return "weekday_only"
    elif all(days[5:] == 1):
        return "weekend_only"
    elif row['service_id'] in calendar_dates['service_id'].values:
        return "holiday_adjusted"
    return "other"

trips['trip_type'] = trips.apply(get_trip_type, axis=1)

# Join trips with stop_times
trip_stops = stop_times.merge(trips[['trip_id', 'trip_type']], on='trip_id', how='left')

# Group by stop_id and trip_type
summary = trip_stops.groupby(['stop_id', 'trip_type']).size().unstack(fill_value=0)

# Filter stops that are served on multiple trip types (e.g., weekday & weekend)
multi_behavior_stops = summary[(summary > 0).sum(axis=1) > 1].reset_index()

# Enrich with stop names
multi_behavior_stops = multi_behavior_stops.merge(stops[['stop_id', 'stop_name']], on='stop_id', how='left')

In [13]:
# Show top 10 testable stops
print("Test-worthy stops where behavior varies by date:\n")
print(multi_behavior_stops.head(20))
# Save the results to a CSV file
multi_behavior_stops.to_csv("./results/multi_behavior_stops.csv", index=False)
# Save the summary to a file
summary.to_csv("stop_trip_summary.csv", index=True)
# Save the enriched trips data
trips.to_csv("./results/enriched_trips.csv", index=False)
# Save the enriched stop_times data
trip_stops.to_csv("./results/enriched_stop_times.csv", index=False)
# Save the calendar data
calendar.to_csv("./results/calendar.csv", index=False)
# Save the calendar_dates data
calendar_dates.to_csv("./results/calendar_dates.csv", index=False)

Test-worthy stops where behavior varies by date:

    stop_id  holiday_adjusted  other  weekday_only  \
0       100               254    166             0   
1       101               254    166             0   
2       102               362    326             0   
3       103               254    166             0   
4       104               254    166             0   
5       105               254    166             0   
6       106               254    166             0   
7       107               357    217             0   
8       108               254    166             0   
9       109               503    285             0   
10      111               504    285             0   
11      120               502    349            72   
12      121               254    166             0   
13      122               254    166             0   
14      123               357    217             0   
15      124               254    166             0   
16      125               254   

In [17]:
import pandas as pd

# Load GTFS core files
calendar = pd.read_csv("./data/calendar.txt")
calendar_dates = pd.read_csv("./data/calendar_dates.txt")
trips = pd.read_csv("./data/trips.txt")
stop_times = pd.read_csv("./data/stop_times.txt")
stops = pd.read_csv("./data/stops.txt")
routes = pd.read_csv("./data/routes.txt")
agency = pd.read_csv("./data/agency.txt")

# Merge trips with calendar and calendar_dates
trips = trips.merge(calendar, on="service_id", how="left")
calendar_dates['type'] = calendar_dates['exception_type'].map({1: 'added', 2: 'removed'})

# Merge trips with stop_times to get stop_id and arrival time
trip_stops = trips.merge(stop_times, on="trip_id", how="inner")

# Merge with stops to get stop_name
trip_stops = trip_stops.merge(stops[['stop_id', 'stop_name']], on='stop_id', how='left')

# Merge with routes to get route_name and agency_id
trip_stops = trip_stops.merge(routes[['route_id', 'route_short_name', 'agency_id']], on='route_id', how='left')

# Merge with agency to get agency_name
trip_stops = trip_stops.merge(agency[['agency_id', 'agency_name']], on='agency_id', how='left')

# Extract weekday-only, weekend-only, and holiday-modified services
weekday_only = trip_stops[(trip_stops['monday'] == 1) & (trip_stops['saturday'] == 0) & (trip_stops['sunday'] == 0)]
weekend_only = trip_stops[(trip_stops['saturday'] == 1) & (trip_stops['monday'] == 0) & (trip_stops['sunday'] == 1)]
holiday_services = calendar_dates[calendar_dates['type'] == 'added'].merge(trips, on='service_id')

# Merge holiday_services with stop_times and stops
holiday_services = holiday_services.merge(stop_times, on='trip_id')
holiday_services = holiday_services.merge(stops[['stop_id', 'stop_name']], on='stop_id')
holiday_services = holiday_services.merge(routes[['route_id', 'route_short_name', 'agency_id']], on='route_id')
holiday_services = holiday_services.merge(agency[['agency_id', 'agency_name']], on='agency_id')
holiday_services['type'] = 'holiday'

# Add "type" column to distinguish trip types
weekday_only['type'] = 'weekday_only'
weekend_only['type'] = 'weekend_only'

# Select useful columns
useful_columns = [
    'trip_id', 'stop_id', 'stop_name', 'arrival_time', 'departure_time',
    'route_id', 'route_short_name', 'agency_name', 'service_id', 'type'
]

# Combine all testable cases
testable = pd.concat([
    weekday_only[useful_columns],
    weekend_only[useful_columns],
    holiday_services[useful_columns]
])

# Sort by stop_name and type for easier testing
testable = testable.sort_values(['stop_name', 'type'])

# Show a sample
print(testable.head(20))

# Optionally, save to CSV for inspection
testable.to_csv("testable_arrivals_by_day.csv", index=False)


: 

In [15]:
# Show top 10 testable stops
print("Test-worthy stops where behavior varies by date:\n")
print(multi_behavior_stops.head(20))


Test-worthy stops where behavior varies by date:

    stop_id  holiday_adjusted  other  weekday_only  \
0       100               254    166             0   
1       101               254    166             0   
2       102               362    326             0   
3       103               254    166             0   
4       104               254    166             0   
5       105               254    166             0   
6       106               254    166             0   
7       107               357    217             0   
8       108               254    166             0   
9       109               503    285             0   
10      111               504    285             0   
11      120               502    349            72   
12      121               254    166             0   
13      122               254    166             0   
14      123               357    217             0   
15      124               254    166             0   
16      125               254   