In [3]:
import csv
from collections import defaultdict
import os

# Path to your GTFS files
GTFS_DIR = r"C:\Users\matth\Code\Python\Projects\nycmta-station-display\assets\gtfs_subway"

# Step 1: Load stops.txt
stop_id_to_name = {}
with open(os.path.join(GTFS_DIR, 'stops.txt'), 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        stop_id_to_name[row['stop_id']] = row['stop_name']

# Step 2: Load trips.txt
trip_to_route = {}
with open(os.path.join(GTFS_DIR, 'trips.txt'), 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        trip_to_route[row['trip_id']] = row['route_id']

# Step 3: Map route_id to stop_ids via stop_times.txt
route_to_stop_ids = defaultdict(set)
with open(os.path.join(GTFS_DIR, 'stop_times.txt'), 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        trip_id = row['trip_id']
        stop_id = row['stop_id']
        route_id = trip_to_route.get(trip_id)
        if route_id:
            route_to_stop_ids[route_id].add(stop_id)

# Step 4: Print each route and its unique stations
for route_id, stop_ids in sorted(route_to_stop_ids.items()):
    station_names = sorted({stop_id_to_name.get(stop_id, stop_id) for stop_id in stop_ids})
    print(f"\nRoute {route_id}:")
    for name in station_names:
        print(f"  - {name}")



Route 1:
  - 103 St
  - 116 St-Columbia University
  - 125 St
  - 137 St-City College
  - 14 St
  - 145 St
  - 157 St
  - 168 St-Washington Hts
  - 18 St
  - 181 St
  - 191 St
  - 207 St
  - 215 St
  - 23 St
  - 231 St
  - 238 St
  - 28 St
  - 34 St-Penn Station
  - 50 St
  - 59 St-Columbus Circle
  - 66 St-Lincoln Center
  - 72 St
  - 79 St
  - 86 St
  - 96 St
  - Canal St
  - Cathedral Pkwy (110 St)
  - Chambers St
  - Christopher St-Stonewall
  - Dyckman St
  - Franklin St
  - Houston St
  - Marble Hill-225 St
  - Rector St
  - South Ferry
  - Times Sq-42 St
  - Van Cortlandt Park-242 St
  - WTC Cortlandt

Route 2:
  - 116 St
  - 125 St
  - 135 St
  - 14 St
  - 149 St-Grand Concourse
  - 174 St
  - 18 St
  - 219 St
  - 225 St
  - 23 St
  - 233 St
  - 28 St
  - 3 Av-149 St
  - 34 St-Penn Station
  - 50 St
  - 59 St-Columbus Circle
  - 66 St-Lincoln Center
  - 72 St
  - 79 St
  - 86 St
  - 96 St
  - Allerton Av
  - Atlantic Av-Barclays Ctr
  - Bergen St
  - Beverly Rd
  - Borough Hal

In [5]:
import pandas as pd
import os

# Path to GTFS files
GTFS_DIR = r"C:\Users\matth\Code\Python\Projects\nycmta-station-display\assets\gtfs_subway"

# Load GTFS files into dataframes
stops_df = pd.read_csv(os.path.join(GTFS_DIR, 'stops.txt'))
trips_df = pd.read_csv(os.path.join(GTFS_DIR, 'trips.txt'))
stop_times_df = pd.read_csv(os.path.join(GTFS_DIR, 'stop_times.txt'))
routes_df = pd.read_csv(os.path.join(GTFS_DIR, 'routes.txt'))

# Merge trips with stop_times to associate stop sequences with routes
trip_stop_times = pd.merge(stop_times_df, trips_df[['trip_id', 'route_id']], on='trip_id')

# To avoid duplications and inconsistencies, select a single trip per route as representative
representative_trips = trip_stop_times.groupby('route_id')['trip_id'].first().reset_index()

# Filter only the stop_times for those representative trips
rep_trip_ids = representative_trips['trip_id'].tolist()
rep_stop_times = trip_stop_times[trip_stop_times['trip_id'].isin(rep_trip_ids)]

# Merge stop names
rep_stop_times = pd.merge(rep_stop_times, stops_df[['stop_id', 'stop_name']], on='stop_id', how='left')

# Rank each stop in order of appearance in the route
rep_stop_times['stop_sequence'] = rep_stop_times['stop_sequence'].astype(int)
rep_stop_times.sort_values(['route_id', 'stop_sequence'], inplace=True)

# Keep relevant columns
route_stations_df = rep_stop_times[['route_id', 'stop_sequence', 'stop_id', 'stop_name']]

# Optional: reset index for a clean DataFrame
route_stations_df.reset_index(drop=True, inplace=True)

# Display or use the DataFrame
print(route_stations_df)

# Example: Save to CSV
# route_stations_df.to_csv('mta_route_stations.csv', index=False)


    route_id  stop_sequence stop_id                  stop_name
0          1              1    101S  Van Cortlandt Park-242 St
1          1              2    103S                     238 St
2          1              3    104S                     231 St
3          1              4    106S         Marble Hill-225 St
4          1              5    107S                     215 St
..       ...            ...     ...                        ...
778        Z             20    M19S                     Bowery
779        Z             21    M20S                   Canal St
780        Z             22    M21S                Chambers St
781        Z             23    M22S                  Fulton St
782        Z             24    M23S                   Broad St

[783 rows x 4 columns]


In [6]:
route_stations_df

Unnamed: 0,route_id,stop_sequence,stop_id,stop_name
0,1,1,101S,Van Cortlandt Park-242 St
1,1,2,103S,238 St
2,1,3,104S,231 St
3,1,4,106S,Marble Hill-225 St
4,1,5,107S,215 St
...,...,...,...,...
778,Z,20,M19S,Bowery
779,Z,21,M20S,Canal St
780,Z,22,M21S,Chambers St
781,Z,23,M22S,Fulton St
