In [2]:
from mta_graph import SubwayGraph
from complexes import ComplexesData
import pandas as pd

complexes = ComplexesData()
mta = SubwayGraph()

In [3]:
mta.build_graph()

In [4]:
def get_onboards_data(complex_id):
    from socrata_od_client import get_ridership_data

    # Get weekday ridership for May 2024 at 9am from station 623
    df = get_ridership_data(
        year=2025,
        month=2,
        day_of_week="Tuesday",
        hour_of_day="9",
        origin_station_complex_id=complex_id
    )

    df_ = df.sort_values('estimated_average_ridership', ascending=False)
    return df_

df_onboards_120 = get_onboards_data("120")
df_onboards_120

Unnamed: 0,year,month,day_of_week,hour_of_day,origin_station_complex_id,origin_station_complex_name,destination_station_complex_id,destination_station_complex_name,estimated_average_ridership
157,2025,2,Tuesday,9,120,Bedford Av (L),602,"14 St-Union Sq (L,N,Q,R,W,4,5,6)",324.6645
209,2025,2,Tuesday,9,120,Bedford Av (L),610,"Grand Central-42 St (S,4,5,6,7)",192.6793
29,2025,2,Tuesday,9,120,Bedford Av (L),618,"14 St (A,C,E)/8 Av (L)",179.8185
110,2025,2,Tuesday,9,120,Bedford Av (L),611,"Times Sq-42 St (N,Q,R,W,S,1,2,3,7)/42 St (A,C,E)",116.7937
212,2025,2,Tuesday,9,120,Bedford Av (L),164,"34 St-Penn Station (A,C,E)",107.6208
...,...,...,...,...,...,...,...,...,...
189,2025,2,Tuesday,9,120,Bedford Av (L),603,"149 St-Grand Concourse (2,4,5)",0.2763
77,2025,2,Tuesday,9,120,Bedford Av (L),340,"Grand Army Plaza (2,3)",0.2763
117,2025,2,Tuesday,9,120,Bedford Av (L),3,"30 Av (N,W)",0.2733
119,2025,2,Tuesday,9,120,Bedford Av (L),71,8 Av (N),0.2733


In [5]:
# 0 = north/east-bound, 1 = south/west-bound
def get_ordered_stops(line : str, direction: int, return_type : int = 0):
    complex_ids = [int(complexes.get_complex_id_by_gtfs_stop_id(stop[:-1])) for stop in mta.ordered_stops(line, direction)]
    complex_names = [complexes.get_station_name_by_gtfs_id(stop[:-1]) for stop in mta.ordered_stops(line, direction)]
    if return_type == 0:
        return dict(zip(complex_names, complex_ids))
    elif return_type == 1:
        return complex_ids
    else:
        return complex_names

In [43]:
def get_onboardings(origin_complex_id: str, line: str, direction: int, df_ : any):
    onboardings = 0
    boardings_df = pd.DataFrame(columns=df_.columns)
    stops_on_line = get_ordered_stops(line, direction, 1)

    for i in range(len(df_)):
        print()
        destination_complex_id = str(df_.iloc[i]["destination_station_complex_id"])
        connecting_lines = mta.connecting_lines(origin_complex_id, destination_complex_id)
        
        print(f"Destination complex id: {destination_complex_id}")
        print(f"Connecting lines: {connecting_lines}")

        if len(connecting_lines) > 0:
            if line in connecting_lines:
                stops_after = get_ordered_stops(line, direction, 1)
                stops_after = stops_after[stops_after.index(int(origin_complex_id)) + 1:]

                print(f"Stops after stop {origin_complex_id}: {stops_after}")

                if int(destination_complex_id) in stops_after:
                    print(f"{destination_complex_id} is in {stops_after}")
                    onboardings += df_.iloc[i]["estimated_average_ridership"] / len(connecting_lines)
                    df_tmp = df_.iloc[[i]].copy()
                    df_tmp["estimated_average_ridership"] /= len(connecting_lines)
                    boardings_df =  pd.concat([boardings_df, df_tmp], ignore_index=True)
                else:
                    print(f"{destination_complex_id} is not in stops_after")

        else:
            shortest_paths = mta.all_shortest_paths(origin_complex_id, destination_complex_id)
            print(f"From {origin_complex_id} to {destination_complex_id}, shortest paths: {shortest_paths}")

            total_paths = 0
            num_paths = 0
            for path in shortest_paths:
                connections = mta.connecting_lines(path[0], path[1])
                distance = abs(stops_on_line.index(int(path[0])) - stops_on_line.index(int(path[1])))
                print(f"From {path[0]} to {path[1]}, connections: {connections}, distance: {distance}")
                total_paths += len(connections)
                if line in connections:
                    if int(path[1]) in stops_after:
                        num_paths += 1
            print(f"Total paths: {total_paths}, num paths: {num_paths}")
            if total_paths > 0 and num_paths > 0:
                onboardings += df_.iloc[i]["estimated_average_ridership"] * (num_paths / total_paths)
                df_tmp = df_.iloc[[i]].copy()
                df_tmp["estimated_average_ridership"] *= (num_paths / total_paths)
                boardings_df =  pd.concat([boardings_df, df_tmp], ignore_index=True)

    return onboardings, boardings_df

In [44]:
obs, df =get_onboardings ("120", "L", 1, df_onboards_120[0:50])


Destination complex id: 602
Connecting lines: ['L']
Stops after stop 120: [629, 122, 123, 124, 125, 126, 127, 630, 129, 130, 131, 621, 133, 134, 135, 136, 137, 138]
602 is not in stops_after

Destination complex id: 610
Connecting lines: []
From 120 to 610, shortest paths: [['120', '602', '610']]
From 120 to 602, connections: ['L'], distance: 3
Total paths: 1, num paths: 0

Destination complex id: 618
Connecting lines: ['L']
Stops after stop 120: [629, 122, 123, 124, 125, 126, 127, 630, 129, 130, 131, 621, 133, 134, 135, 136, 137, 138]
618 is not in stops_after

Destination complex id: 611
Connecting lines: []
From 120 to 611, shortest paths: [['120', '602', '611'], ['120', '601', '611'], ['120', '618', '611'], ['120', '621', '611']]
From 120 to 602, connections: ['L'], distance: 3
From 120 to 601, connections: ['L'], distance: 4
From 120 to 618, connections: ['L'], distance: 5
From 120 to 621, connections: ['L'], distance: 12
Total paths: 4, num paths: 1

Destination complex id: 164


  boardings_df =  pd.concat([boardings_df, df_tmp], ignore_index=True)


In [35]:
print("Onboardings: ", obs)
df

Onboardings:  2290.068751515151


Unnamed: 0,year,month,day_of_week,hour_of_day,origin_station_complex_id,origin_station_complex_name,destination_station_complex_id,destination_station_complex_name,estimated_average_ridership
0,2025,2,Tuesday,9,120,Bedford Av (L),602,"14 St-Union Sq (L,N,Q,R,W,4,5,6)",324.6645
1,2025,2,Tuesday,9,120,Bedford Av (L),610,"Grand Central-42 St (S,4,5,6,7)",192.6793
2,2025,2,Tuesday,9,120,Bedford Av (L),618,"14 St (A,C,E)/8 Av (L)",179.8185
3,2025,2,Tuesday,9,120,Bedford Av (L),611,"Times Sq-42 St (N,Q,R,W,S,1,2,3,7)/42 St (A,C,E)",87.595275
4,2025,2,Tuesday,9,120,Bedford Av (L),164,"34 St-Penn Station (A,C,E)",53.8104
5,2025,2,Tuesday,9,120,Bedford Av (L),607,"34 St-Herald Sq (B,D,F,M,N,Q,R,W)",102.56
6,2025,2,Tuesday,9,120,Bedford Av (L),601,"14 St (F,M,1,2,3)/6 Av (L)",99.8065
7,2025,2,Tuesday,9,120,Bedford Av (L),225,"47-50 Sts-Rockefeller Ctr (B,D,F,M)",87.4332
8,2025,2,Tuesday,9,120,Bedford Av (L),628,"Fulton St (A,C,J,Z,2,3,4,5)",65.56575
9,2025,2,Tuesday,9,120,Bedford Av (L),119,1 Av (L),72.1755


In [9]:
mta.lines_at_complex_id("621")

['A', 'C', 'J', 'L', 'Z']

In [None]:
l_stops = get_ordered_stops("L", 1, 1)

abs(l_stops.index(120) - l_stops.index(621))

12