In [1]:
import requests, zipfile, io, pandas as pd, networkx as nx

# 2 ── read the three GTFS tables we need
stops       = pd.read_csv("data/gtfs_subway/stops.txt")          # stop_id, stop_name …
trips       = pd.read_csv("data/gtfs_subway/trips.txt")          # trip_id, route_id …
stop_times  = pd.read_csv("data/gtfs_subway/stop_times.txt")     # trip_id, stop_id, stop_sequence …


In [None]:

# 3 ── map trip → route so we know which line each edge belongs to
trip2route = dict(zip(trips.trip_id, trips.route_id))

# 4 ── build a graph whose
#      • nodes = stop_id with attribute stop_name
#      • edges = consecutive stop pairs, attribute “lines” = list of route_ids serving that edge
G = nx.DiGraph()

# add all stations once
for row in stops.itertuples():
    G.add_node(row.stop_id, stop_name=row.stop_name)

# iterate through every trip’s ordered stops
for trip_id, group in stop_times.groupby("trip_id"):
    route = trip2route[trip_id]
    ordered = group.sort_values("stop_sequence").stop_id.to_list()
    for u, v in zip(ordered[:-1], ordered[1:]):
        if G.has_edge(u, v):
            G[u][v]["lines"].add(route)
        else:
            G.add_edge(u, v, lines={route})

# 5 ── convert each edge’s set to a sorted list for easier JSON serialisation later
for u, v, data in G.edges(data=True):
    data["lines"] = sorted(data["lines"])

In [3]:
G.nodes

NodeView(('101', '101N', '101S', '103', '103N', '103S', '104', '104N', '104S', '106', '106N', '106S', '107', '107N', '107S', '108', '108N', '108S', '109', '109N', '109S', '110', '110N', '110S', '111', '111N', '111S', '112', '112N', '112S', '113', '113N', '113S', '114', '114N', '114S', '115', '115N', '115S', '116', '116N', '116S', '117', '117N', '117S', '118', '118N', '118S', '119', '119N', '119S', '120', '120N', '120S', '121', '121N', '121S', '122', '122N', '122S', '123', '123N', '123S', '124', '124N', '124S', '125', '125N', '125S', '126', '126N', '126S', '127', '127N', '127S', '128', '128N', '128S', '129', '129N', '129S', '130', '130N', '130S', '131', '131N', '131S', '132', '132N', '132S', '133', '133N', '133S', '134', '134N', '134S', '135', '135N', '135S', '136', '136N', '136S', '137', '137N', '137S', '138', '138N', '138S', '139', '139N', '139S', '140', '140N', '140S', '142', '142N', '142S', '201', '201N', '201S', '204', '204N', '204S', '205', '205N', '205S', '206', '206N', '206S', '

In [5]:
lookup_sid   = dict(zip(stops.stop_name, stops.stop_id))
lookup_name  = dict(zip(stops.stop_id, stops.stop_name))

In [67]:
lookup_name["L01S"]

'8 Av'

In [9]:
print(lookup_sid["1 Av"])
print(lookup_sid["Bedford Av"])

L06S
L08S


In [63]:
u = "L01S"
G[u]

AtlasView({'L02S': {'lines': ['L']}})

In [27]:
lookup_name["L06N"]

'1 Av'

In [83]:
def ordered_stops(route: str, direction: int) -> list[str]:
    """Return the list of stop_ids (SIDs) for this line and direction (0=north/east, 1=south/west)."""
    # 1 – pick any trip that matches
    trip_id = trips.query("route_id==@route and direction_id==@direction").trip_id.iloc[0]
    # 2 – grab its ordered stop_ids
    return (stop_times.query("trip_id==@trip_id")
                       .sort_values("stop_sequence")
                       .stop_id
                       .tolist())

In [91]:
route = "L"
direction = 1

trips.query(
            "route_id==@route and direction_id==@direction"
        )

Unnamed: 0,route_id,trip_id,service_id,trip_headsign,direction_id,shape_id
10736,L,BFA24GEN-L026-Saturday-00_005400_L..S01R,Saturday,Canarsie-Rockaway Pkwy,1,L..S01R
10738,L,BFA24GEN-L026-Saturday-00_006600_L..S01R,Saturday,Canarsie-Rockaway Pkwy,1,L..S01R
10739,L,BFA24GEN-L026-Saturday-00_007800_L..S01R,Saturday,Canarsie-Rockaway Pkwy,1,L..S01R
10741,L,BFA24GEN-L026-Saturday-00_009250_L..S01R,Saturday,Canarsie-Rockaway Pkwy,1,L..S01R
10743,L,BFA24GEN-L026-Saturday-00_011250_L..S01R,Saturday,Canarsie-Rockaway Pkwy,1,L..S01R
...,...,...,...,...,...,...
12217,L,BFA24GEN-L049-Weekday-00_144000_L..S01R,Weekday,Canarsie-Rockaway Pkwy,1,L..S01R
12218,L,BFA24GEN-L049-Weekday-00_145000_L..S01R,Weekday,Canarsie-Rockaway Pkwy,1,L..S01R
12219,L,BFA24GEN-L049-Weekday-00_146000_L..S01R,Weekday,Canarsie-Rockaway Pkwy,1,L..S01R
12220,L,BFA24GEN-L049-Weekday-00_147000_L..S01R,Weekday,Canarsie-Rockaway Pkwy,1,L..S01R


In [92]:
ordered_stops("L", 1)

['L01S',
 'L02S',
 'L03S',
 'L05S',
 'L06S',
 'L08S',
 'L10S',
 'L11S',
 'L12S',
 'L13S',
 'L14S',
 'L15S',
 'L16S',
 'L17S',
 'L19S',
 'L20S',
 'L21S',
 'L22S',
 'L24S',
 'L25S',
 'L26S',
 'L27S',
 'L28S',
 'L29S']

In [40]:
list(G.successors("L06N"))

['L05N']

In [43]:
nx.get_node_attributes(G, "stop_name") 

{'101': 'Van Cortlandt Park-242 St',
 '101N': 'Van Cortlandt Park-242 St',
 '101S': 'Van Cortlandt Park-242 St',
 '103': '238 St',
 '103N': '238 St',
 '103S': '238 St',
 '104': '231 St',
 '104N': '231 St',
 '104S': '231 St',
 '106': 'Marble Hill-225 St',
 '106N': 'Marble Hill-225 St',
 '106S': 'Marble Hill-225 St',
 '107': '215 St',
 '107N': '215 St',
 '107S': '215 St',
 '108': '207 St',
 '108N': '207 St',
 '108S': '207 St',
 '109': 'Dyckman St',
 '109N': 'Dyckman St',
 '109S': 'Dyckman St',
 '110': '191 St',
 '110N': '191 St',
 '110S': '191 St',
 '111': '181 St',
 '111N': '181 St',
 '111S': '181 St',
 '112': '168 St-Washington Hts',
 '112N': '168 St-Washington Hts',
 '112S': '168 St-Washington Hts',
 '113': '157 St',
 '113N': '157 St',
 '113S': '157 St',
 '114': '145 St',
 '114N': '145 St',
 '114S': '145 St',
 '115': '137 St-City College',
 '115N': '137 St-City College',
 '115S': '137 St-City College',
 '116': '125 St',
 '116N': '125 St',
 '116S': '125 St',
 '117': '116 St-Columbia Un

In [46]:
names = [G.nodes[s]["stop_name"] for s in ordered_stops("L", 0)]
names

['Canarsie-Rockaway Pkwy',
 'East 105 St',
 'New Lots Av',
 'Livonia Av',
 'Sutter Av',
 'Atlantic Av',
 'Broadway Junction',
 'Bushwick Av-Aberdeen St',
 'Wilson Av',
 'Halsey St',
 'Myrtle-Wyckoff Avs',
 'DeKalb Av',
 'Jefferson St',
 'Morgan Av',
 'Montrose Av',
 'Grand St',
 'Graham Av',
 'Lorimer St',
 'Bedford Av',
 '1 Av',
 '3 Av',
 '14 St-Union Sq',
 '6 Av',
 '8 Av']

In [66]:
from collections import defaultdict

route_graphs = defaultdict(nx.Graph)
for u, v, data in G.edges(data=True):
    for route in data["route_id"]:            # edge may be served by many lines
        route_graphs[route].add_edge(u, v)

def has_same_line_path(src, dst, route):
    """True iff a train on `route` can carry you from src → dst without transfer."""
    g = route_graphs.get(route)
    return g and nx.has_path(g, src, dst)

KeyError: 'route_id'

In [69]:
G.nodes

NodeView(('101', '101N', '101S', '103', '103N', '103S', '104', '104N', '104S', '106', '106N', '106S', '107', '107N', '107S', '108', '108N', '108S', '109', '109N', '109S', '110', '110N', '110S', '111', '111N', '111S', '112', '112N', '112S', '113', '113N', '113S', '114', '114N', '114S', '115', '115N', '115S', '116', '116N', '116S', '117', '117N', '117S', '118', '118N', '118S', '119', '119N', '119S', '120', '120N', '120S', '121', '121N', '121S', '122', '122N', '122S', '123', '123N', '123S', '124', '124N', '124S', '125', '125N', '125S', '126', '126N', '126S', '127', '127N', '127S', '128', '128N', '128S', '129', '129N', '129S', '130', '130N', '130S', '131', '131N', '131S', '132', '132N', '132S', '133', '133N', '133S', '134', '134N', '134S', '135', '135N', '135S', '136', '136N', '136S', '137', '137N', '137S', '138', '138N', '138S', '139', '139N', '139S', '140', '140N', '140S', '142', '142N', '142S', '201', '201N', '201S', '204', '204N', '204S', '205', '205N', '205S', '206', '206N', '206S', '

In [82]:
nx.shortest_path(G, 'L01S', '128')

NetworkXNoPath: No path between L01S and 128.

In [76]:
lookup_sid

{'Van Cortlandt Park-242 St': '101S',
 '238 St': '103S',
 '231 St': '104S',
 'Marble Hill-225 St': '106S',
 '215 St': '107S',
 '207 St': '108S',
 'Dyckman St': 'A03S',
 '191 St': '110S',
 '181 St': 'A06S',
 '168 St-Washington Hts': '112S',
 '157 St': '113S',
 '145 St': 'D13S',
 '137 St-City College': '115S',
 '125 St': 'A15S',
 '116 St-Columbia University': '117S',
 'Cathedral Pkwy (110 St)': 'A17S',
 '103 St': 'A18S',
 '96 St': 'Q05S',
 '86 St': 'R44S',
 '79 St': 'B18S',
 '72 St': 'Q03S',
 '66 St-Lincoln Center': '124S',
 '59 St-Columbus Circle': 'A24S',
 '50 St': 'B14S',
 'Times Sq-42 St': 'R16S',
 '34 St-Penn Station': 'A28S',
 '28 St': 'R18S',
 '23 St': 'R19S',
 '18 St': '131S',
 '14 St': 'D19S',
 'Christopher St-Stonewall': '133S',
 'Houston St': '134S',
 'Canal St': 'R23S',
 'Franklin St': '136S',
 'Chambers St': 'M21S',
 'WTC Cortlandt': '138S',
 'Rector St': 'R26S',
 'South Ferry Loop': '140S',
 'South Ferry': '142S',
 'Wakefield-241 St': '201S',
 'Nereid Av': '204S',
 '233 St'