In [1]:
from pydantic import BaseModel


class ServerConfig(BaseModel):
    area_name: str
    gtfs_url: str
    ignored_lines: list[str]
    custom_stop_nodes: dict[str, int]


with open("krakow_server_config.json") as file:
    server_config = ServerConfig.model_validate_json(file.read())

server_config

ServerConfig(area_name='Kraków', gtfs_url='https://gtfs.ztp.krakow.pl/GTFS_KRK_T.zip', ignored_lines=['99'], custom_stop_nodes={'stop_272_46419': 1770194211, 'stop_273_46529': 2163355814, 'stop_274_46619': 2756848361, 'stop_294_61419': 3114829955, 'stop_314_92239': 629106153})

In [2]:
from zipfile import ZipFile
import pandas as pd


with ZipFile("GTFS_KRK_T_test.zip") as zip_file:
    with zip_file.open("stops.txt") as file:
        gtfs_stops = pd.read_csv(file).set_index("stop_id")

    with zip_file.open("routes.txt") as file:
        gtfs_routes = pd.read_csv(file).set_index("route_id")

    with zip_file.open("trips.txt") as file:
        gtfs_trips = pd.read_csv(file).set_index("trip_id")

    with zip_file.open("stop_times.txt") as file:
        gtfs_stop_times = pd.read_csv(file)

In [3]:
gtfs_stops.head()

Unnamed: 0_level_0,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,platform_code
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
stop_1_1,,Grodzki Urząd Pracy,,50.0873,20.0596,,,0,,,,
stop_1_2,,Grodzki Urząd Pracy,,50.0874,20.0597,,,0,,,,
stop_2_4,,Kościuszkowców,,50.016651,19.929831,,,0,,,,
stop_2_5,,Kościuszkowców,,50.015661,19.929342,,,0,,,,
stop_3_6,,Solvay,,50.014561,19.927837,,,0,,,,


In [4]:
gtfs_routes.head()

Unnamed: 0_level_0,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
route_31,agency_1,1,1,,900,,B60000,FFFFFF
route_43,agency_1,19,19,,900,,B60000,FFFFFF
route_46,agency_1,22,22,,900,,B60000,FFFFFF
route_53,agency_1,62,62,,900,,232323,FFFFFF
route_54,agency_1,64,64,,900,,232323,FFFFFF


In [5]:
gtfs_trips.head()

Unnamed: 0_level_0,route_id,service_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
block_4_trip_1_service_4,route_31,service_4,Salwator,,0,block_4,shape_56,0
block_4_trip_2_service_4,route_31,service_4,Jarzębiny,,0,block_4,shape_23346,0
block_4_trip_3_service_4,route_31,service_4,Salwator,,1,block_4,shape_23347,0
block_4_trip_4_service_4,route_31,service_4,Jarzębiny,,0,block_4,shape_23346,0
block_4_trip_5_service_4,route_31,service_4,Salwator,,1,block_4,shape_23347,0


In [6]:
gtfs_stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
0,block_4_trip_1_service_4,05:58:00,05:58:00,stop_293_61329,2,,1,0,0.3,1
1,block_4_trip_1_service_4,05:59:00,05:59:00,stop_292_61229,3,,1,0,0.487,1
2,block_4_trip_1_service_4,06:01:00,06:01:00,stop_285_58429,4,,1,0,1.104,1
3,block_4_trip_1_service_4,06:03:00,06:03:00,stop_284_57729,5,,1,0,1.812,1
4,block_4_trip_1_service_4,06:04:00,06:04:00,stop_283_57629,6,,1,0,2.314,1


In [7]:
from collections import defaultdict

gtfs_stop_times_dict = gtfs_stop_times.set_index(["trip_id", "stop_sequence"]).to_dict()
gtfs_stop_times_dict_for_stop_ids: dict[tuple[str, int], str] = gtfs_stop_times_dict["stop_id"]

gtfs_stop_ids_by_trip_id: defaultdict[str, list[str]] = defaultdict(list)
for trip_id, stop_sequence in sorted(gtfs_stop_times_dict_for_stop_ids.keys()):
    gtfs_stop_ids_by_trip_id[trip_id].append(
        gtfs_stop_times_dict_for_stop_ids[trip_id, stop_sequence]
    )

In [8]:
import pickle
import overpy as op


OVERPASS_QUERY = f"""
[out:json];
area["name"="{server_config.area_name}"]->.search_area;
(
    relation["route"="tram"](area.search_area);
  	node["railway"="tram_stop"]["public_transport"="stop_position"](area.search_area);
    node(id:{", ".join(map(str, server_config.custom_stop_nodes.values()))})(area.search_area);
);
out geom;
"""

query_result = op.Overpass().query(OVERPASS_QUERY)
with open("overpass_query_test.pkl", "wb") as file:
    pickle.dump(query_result, file)

In [9]:
with open("overpass_query_test.pkl", "rb") as file:
    query_result: op.Result = pickle.load(file)

osm_relations = query_result.get_relations()
osm_nodes = query_result.get_nodes()

In [10]:
osm_relations

[<overpy.Relation id=172969>,
 <overpy.Relation id=172971>,
 <overpy.Relation id=172972>,
 <overpy.Relation id=172973>,
 <overpy.Relation id=175220>,
 <overpy.Relation id=175221>,
 <overpy.Relation id=175262>,
 <overpy.Relation id=175322>,
 <overpy.Relation id=175329>,
 <overpy.Relation id=176851>,
 <overpy.Relation id=176853>,
 <overpy.Relation id=223307>,
 <overpy.Relation id=223310>,
 <overpy.Relation id=943055>,
 <overpy.Relation id=956738>,
 <overpy.Relation id=960169>,
 <overpy.Relation id=968203>,
 <overpy.Relation id=1155838>,
 <overpy.Relation id=1155971>,
 <overpy.Relation id=2375052>,
 <overpy.Relation id=2459143>,
 <overpy.Relation id=2465242>,
 <overpy.Relation id=3147141>,
 <overpy.Relation id=3147263>,
 <overpy.Relation id=3147285>,
 <overpy.Relation id=3153214>,
 <overpy.Relation id=3153809>,
 <overpy.Relation id=3154308>,
 <overpy.Relation id=3154332>,
 <overpy.Relation id=3154922>,
 <overpy.Relation id=3154923>,
 <overpy.Relation id=3154965>,
 <overpy.Relation id=3154

In [11]:
osm_nodes

[<overpy.Node id=213578731 lat=50.0791355 lon=20.0650412>,
 <overpy.Node id=213585855 lat=50.0853912 lon=20.0385224>,
 <overpy.Node id=213585864 lat=50.0824060 lon=20.0329435>,
 <overpy.Node id=213585881 lat=50.0790506 lon=20.0264899>,
 <overpy.Node id=213585893 lat=50.0755265 lon=20.0201847>,
 <overpy.Node id=213585922 lat=50.0743017 lon=20.0046383>,
 <overpy.Node id=213585931 lat=50.0740094 lon=19.9989789>,
 <overpy.Node id=213592007 lat=50.0588320 lon=19.9623647>,
 <overpy.Node id=213593804 lat=50.0596288 lon=19.9762713>,
 <overpy.Node id=213593828 lat=50.0601810 lon=19.9673743>,
 <overpy.Node id=213597071 lat=50.0673967 lon=20.0053735>,
 <overpy.Node id=213602155 lat=50.0723553 lon=20.0285576>,
 <overpy.Node id=213605085 lat=50.0745301 lon=20.0462495>,
 <overpy.Node id=213643707 lat=50.0845139 lon=20.0206008>,
 <overpy.Node id=213644494 lat=50.0802621 lon=20.0265424>,
 <overpy.Node id=213645106 lat=50.0900802 lon=20.0214207>,
 <overpy.Node id=213645256 lat=50.0943601 lon=20.0111263

In [12]:
import string


def gtfs_stop_name_to_comparable(stop_name: str):
    return (
        stop_name.lower()
        .replace(".", "")
        .replace(" ", "")
        .replace("(nż)", "")
    )


def stop_name_to_comparable(stop_name: str):
    return (
        stop_name.lower()
        .rstrip(string.digits)
        .replace(".", "")
        .replace(" ", "")
    )


osm_node_by_id = {
    item.id: item
    for item in osm_nodes
}

stops_by_relation = {
    relation: [
        osm_node_by_id[member.resolve().id]
        for member in relation.members
        if isinstance(member, op.RelationNode)
        and member.resolve().id in osm_node_by_id
    ]
    for relation in osm_relations
}

comparable_stop_names_by_relation = {
    relation: [
        stop_name_to_comparable(item.tags.get("name"))
        for item in stops
    ]
    for relation, stops in stops_by_relation.items()
}

def line_name_key_sort(item: str):
    words = item.split()
    if len(words) < 2:
        return words

    line_number = words[1]
    if line_number.endswith(":"):
        line_number = line_number[:-1]

    return int(line_number)


sorted((item.tags["name"] for item in stops_by_relation), key=line_name_key_sort)

['Tram 1: Jarzębiny => Salwator',
 'Tram 1: Salwator => Jarzębiny',
 'Tram 2: Cmentarz Rakowicki => Salwator',
 'Tram 2: Salwator => Cmentarz Rakowicki',
 'Tram 3: Nowy Bieżanów P+R → Krowodrza Górka P+R',
 'Tram 3: Krowodrza Górka P+R → Nowy Bieżanów P+R',
 'Tram 4: Elektromontaż => Bronowice Małe',
 'Tram 4: Bronowice Małe => Elektromontaż',
 'Tram 5: Jarzębiny → Krowodrza Górka P+R',
 'Tram 5: Krowodrza Górka P+R → Jarzębiny',
 'Tram 6: Mały Płaszów P+R => Cichy Kącik',
 'Tram 6: Cichy Kącik => Mały Płaszów P+R',
 'Tram 8: Bronowice Małe => Borek Fałęcki',
 'Tram 8: Borek Fałęcki => Bronowice Małe',
 'Tram 9: Mistrzejowice => Nowy Bieżanów P+R',
 'Tram 9: Nowy Bieżanów P+R => Mistrzejowice',
 'Tram 10: Pleszów => Kurdwanów P+R',
 'Tram 10: Kurdwanów P+R => Pleszów',
 'Tram 11: Czerwone Maki P+R -> Mały Płaszów',
 'Tram 11: Mały Płaszów => Czerwone Maki P+R',
 'Tram 13: Nowy Bieżanów => Bronowice',
 'Tram 13: Bronowice => Nowy Bieżanów P+R',
 'Tram 14: Bronowice => Mistrzejowice',
 '

In [13]:
import difflib
from collections import defaultdict


def is_longer_match(
    longest_match: difflib.Match,
    longest_relation: op.Relation,
    match_result: difflib.Match,
    match_relation: op.Relation,
):
    if match_result.size < 2 or longest_match.size > match_result.size:
        return False

    longest_relation_stop_count = len(stops_by_relation[longest_relation])
    match_relation_stop_count = len(stops_by_relation[match_relation])
    return (
        longest_match.size / longest_relation_stop_count
        <= match_result.size / match_relation_stop_count
    )


def find_longest_matching_relation(relations: list[op.Relation], gtfs_trip_stop_names: list[str]):
    longest_match = difflib.Match(0, 0, 0)
    longest_relation = relations[0]

    for relation in relations:
        sequence_matcher = difflib.SequenceMatcher(
            None,
            gtfs_trip_stop_names,
            comparable_stop_names_by_relation[relation],
        )

        match_result = sequence_matcher.find_longest_match(
            0, len(gtfs_trip_stop_names), 0, len(stops_by_relation[relation])
        )

        if is_longer_match(longest_match, longest_relation, match_result, relation):
            longest_match, longest_relation = match_result, relation

    return longest_match, longest_relation

 
def add_trip_to_mapping(
    gtfs_trip_id: str,
    relations: list[op.Relation],
    stop_mapping: dict[str, set[int]],
    start_stop_mapping: defaultdict[str, set[int]],
    end_stop_mapping: defaultdict[str, set[int]],
):
    gtfs_trip_stops = gtfs_stop_ids_by_trip_id[gtfs_trip_id]
    gtfs_trip_stop_data = gtfs_stops.loc[gtfs_trip_stops]
    gtfs_trip_stop_names = [
        gtfs_stop_name_to_comparable(item)
        for item in gtfs_trip_stop_data["stop_name"]
    ]

    longest_match, longest_relation = find_longest_matching_relation(
        relations, gtfs_trip_stop_names
    )

    for i, (gtfs_stop_id, osm_node) in enumerate(
        zip(
            gtfs_trip_stop_data.iloc[longest_match.a:longest_match.a + longest_match.size].index,
            stops_by_relation[longest_relation][longest_match.b:longest_match.b + longest_match.size]
        )
    ):
        if 0 < i < longest_match.size - 1:
            stop_mapping[gtfs_stop_id].add(osm_node.id)

    if gtfs_trip_stop_names[1:-1] == comparable_stop_names_by_relation[longest_relation][1:-1]:
        start_stop_mapping[gtfs_trip_stops[0]].add(stops_by_relation[longest_relation][0].id)
        end_stop_mapping[gtfs_trip_stops[-1]].add(stops_by_relation[longest_relation][-1].id)

    return longest_match.size, longest_relation


def update_relations_for_route(
    route_number: str,
    gtfs_route_id: str,
    gtfs_stop_id_to_osm_node_id_mapping: dict[str, set[int]],
    start_gtfs_stop_id_to_osm_node_id_mapping: defaultdict[str, set[int]],
    end_gtfs_stop_id_to_osm_node_id_mapping: defaultdict[str, set[int]],
    longest_match_by_relation: dict[op.Relation, int],
    longest_relation_by_trip_id: dict[str, op.Relation],
    missing_relation_lines: list[str],
):
    relations = [
        item
        for item in stops_by_relation
        if item.tags.get("ref") == route_number
    ]

    if not relations:
        missing_relation_lines.append(route_number)
        return
    
    gtfs_trips_for_route = gtfs_trips[gtfs_trips["route_id"] == gtfs_route_id]
    for gtfs_trip_id in gtfs_trips_for_route.index:
        longest_match_size, longest_relation = add_trip_to_mapping(
            str(gtfs_trip_id),
            relations,
            gtfs_stop_id_to_osm_node_id_mapping,
            start_gtfs_stop_id_to_osm_node_id_mapping,
            end_gtfs_stop_id_to_osm_node_id_mapping,
        )

        longest_match_by_relation[longest_relation] = max(
            longest_match_by_relation.get(longest_relation, 0),
            longest_match_size
        )

        longest_relation_by_trip_id[gtfs_trip_id] = longest_relation


def detect_node_mapping_errors():
    gtfs_stop_id_to_osm_node_id_mapping: dict[str, set[int]] = {
        str(stop_id): set()
        for stop_id in gtfs_stops.index
    }
    start_gtfs_stop_id_to_osm_node_id_mapping: dict[str, set[int]] = defaultdict(set)
    end_gtfs_stop_id_to_osm_node_id_mapping: dict[str, set[int]] = defaultdict(set)

    longest_match_by_relation: dict[op.Relation, int] = {}
    longest_relation_by_trip_id: dict[str, op.Relation] = {}
    missing_relation_lines: list[str] = []
    for gtfs_route_id, gtfs_route_row in gtfs_routes.iterrows():
        route_number = str(gtfs_route_row["route_long_name"])
        if route_number in server_config.ignored_lines:
            continue

        update_relations_for_route(
            route_number,
            gtfs_route_id,
            gtfs_stop_id_to_osm_node_id_mapping,
            start_gtfs_stop_id_to_osm_node_id_mapping,
            end_gtfs_stop_id_to_osm_node_id_mapping,
            longest_match_by_relation,
            longest_relation_by_trip_id,
            missing_relation_lines
        )

    for gtfs_stop_id, node_id in server_config.custom_stop_nodes.items():
        gtfs_stop_id_to_osm_node_id_mapping[gtfs_stop_id] = {node_id}

    nodes_without_mapping: set[str] = set()
    gtfs_stop_id_to_node_id: dict[str, int] = {}
    nodes_with_conflict: dict[str, list[tuple[str | None, int]]] = {}
    for gtfs_stop_id, osm_node_ids in gtfs_stop_id_to_osm_node_id_mapping.items():
        match len(osm_node_ids):
            case 0:
                nodes_without_mapping.add(gtfs_stop_id)
            case 1:
                gtfs_stop_id_to_node_id[gtfs_stop_id] = next(iter(osm_node_ids))
            case _:
                nodes_with_conflict[gtfs_stop_id] = [
                    (osm_node_by_id[node_id].tags.get("name"), node_id)
                    for node_id in osm_node_ids
                ]

    start_gtfs_stop_id_to_node_ids = {
        gtfs_stop_id: list(node_ids)
        for gtfs_stop_id, node_ids in start_gtfs_stop_id_to_osm_node_id_mapping.items()
    }

    end_gtfs_stop_id_to_node_ids = {
        gtfs_stop_id: list(node_ids)
        for gtfs_stop_id, node_ids in end_gtfs_stop_id_to_osm_node_id_mapping.items()
    }

    nodes_without_mapping = (
        nodes_without_mapping
        .difference(start_gtfs_stop_id_to_osm_node_id_mapping)
        .difference(end_gtfs_stop_id_to_osm_node_id_mapping)
    )

    underutilized_relations = [
        relation
        for relation, stops in stops_by_relation.items()
        if longest_match_by_relation[relation] < len(stops)
    ]

    return (
        gtfs_stop_id_to_node_id,
        start_gtfs_stop_id_to_node_ids,
        end_gtfs_stop_id_to_node_ids,
        missing_relation_lines,
        nodes_with_conflict,
        nodes_without_mapping,
        underutilized_relations,
        longest_relation_by_trip_id
    )


(
    gtfs_stop_id_to_node_id,
    start_gtfs_stop_id_to_node_ids,
    end_gtfs_stop_id_to_node_ids,
    missing_relation_lines,
    nodes_with_conflict,
    nodes_without_mapping,
    underutilized_relations,
    longest_relation_by_trip_id,
) = detect_node_mapping_errors()

In [14]:
missing_relation_lines

['75']

In [15]:
start_gtfs_stop_id_to_node_ids

{'stop_213_31119': [2419732952],
 'stop_342_268529': [12297835419],
 'stop_186_7029': [2420286329],
 'stop_310_74719': [321429867],
 'stop_273_46519': [2163355813],
 'stop_353_274419': [321437391],
 'stop_572_303819': [2017955951],
 'stop_212_13519': [2419986540],
 'stop_238_37819': [213646624],
 'stop_1486_372439': [11872059166],
 'stop_840_317519': [287586332],
 'stop_1552_378519': [11498423077],
 'stop_329_253929': [1770978364],
 'stop_952_331019': [1889519517],
 'stop_841_317619': [5728702340],
 'stop_266_45819': [1764757832],
 'stop_272_46429': [1369031767],
 'stop_198_8919': [6527877110],
 'stop_196_8719': [2419831874, 2423298435],
 'stop_328_253829': [2427246149],
 'stop_574_304039': [4069652993],
 'stop_202_12419': [321458378],
 'stop_841_317630': [9955186239]}

In [16]:
end_gtfs_stop_id_to_node_ids

{'stop_342_268529': [12297835418],
 'stop_213_31119': [2419732952],
 'stop_310_74719': [2431859270],
 'stop_186_7029': [4559858147],
 'stop_273_46519': [2163355814],
 'stop_572_303819': [2017955948],
 'stop_353_274419': [321437391],
 'stop_238_37819': [2424134783],
 'stop_212_13519': [2419986541],
 'stop_840_317519': [287442350],
 'stop_1486_372439': [10180183263],
 'stop_329_253929': [213645106],
 'stop_952_331019': [2431520659],
 'stop_841_317619': [289685340],
 'stop_1552_378519': [11498423079],
 'stop_266_45819': [2427003514],
 'stop_272_46429': [1770194211],
 'stop_198_8919': [6527877110],
 'stop_196_8719': [2419831874, 2423298435],
 'stop_328_253829': [213648418],
 'stop_574_304039': [1768224723],
 'stop_202_12419': [2425718627],
 'stop_841_317630': [9955186238]}

In [17]:
gtfs_stop_id_to_node_id

{'stop_1_1': 11884770961,
 'stop_1_2': 11884770962,
 'stop_2_4': 629106146,
 'stop_2_5': 2422550518,
 'stop_3_6': 629106144,
 'stop_3_7': 2422538507,
 'stop_4_8': 2419959763,
 'stop_4_9': 2427903882,
 'stop_5_10': 2426058892,
 'stop_5_11': 213593828,
 'stop_6_12': 2426068337,
 'stop_6_13': 638606093,
 'stop_8_15': 2163355802,
 'stop_8_16': 213585931,
 'stop_183_6119': 10172286936,
 'stop_183_6129': 652730396,
 'stop_186_7019': 2420286330,
 'stop_186_7029': 2420286329,
 'stop_186_7039': 2423804378,
 'stop_187_7119': 2423804379,
 'stop_187_7129': 2420286331,
 'stop_188_7219': 2423789754,
 'stop_188_7229': 2423789750,
 'stop_189_7319': 5018097188,
 'stop_189_7329': 2429438683,
 'stop_189_7339': 2420153435,
 'stop_189_7349': 2424198631,
 'stop_191_7719': 2419894822,
 'stop_191_7729': 470643346,
 'stop_191_7739': 2419959769,
 'stop_193_7919': 2419959766,
 'stop_193_7929': 2419959767,
 'stop_194_8319': 2419986545,
 'stop_194_8329': 2419986546,
 'stop_195_8419': 2419986538,
 'stop_195_8429': 

In [18]:
nodes_with_conflict

{}

In [19]:
nodes_without_mapping

set()

In [20]:
underutilized_relations

[]

In [21]:
import random


def get_node_id_for_trip_stop(
    gtfs_stop_id: str, gtfs_stop_sequence: int, total_stops: int
):
    if gtfs_stop_id in gtfs_stop_id_to_node_id:
        return gtfs_stop_id_to_node_id[gtfs_stop_id]
    
    if (
        gtfs_stop_id in start_gtfs_stop_id_to_node_ids
        and gtfs_stop_id in end_gtfs_stop_id_to_node_ids
    ):
        return (
            random.choice(start_gtfs_stop_id_to_node_ids[gtfs_stop_id])
            if gtfs_stop_sequence < total_stops / 2
            else random.choice(end_gtfs_stop_id_to_node_ids[gtfs_stop_id])
        )
    

    if gtfs_stop_id in start_gtfs_stop_id_to_node_ids:
        return random.choice(start_gtfs_stop_id_to_node_ids[gtfs_stop_id])
    elif gtfs_stop_id in end_gtfs_stop_id_to_node_ids:
        return random.choice(end_gtfs_stop_id_to_node_ids[gtfs_stop_id])

    return None


def get_stop_nodes_by_gtfs_trip_id():
    stop_nodes_by_gtfs_trip_id: dict[str, list[int]] = {}
    gtfs_trips_with_missing_node_ids: list[tuple[str, list[int | None]]] = []

    for gtfs_trip_id, longest_relation in longest_relation_by_trip_id.items():
        relation_stop_nodes = stops_by_relation[longest_relation]
        relation_stop_names = comparable_stop_names_by_relation[longest_relation]

        gtfs_trip_stops = gtfs_stop_ids_by_trip_id[gtfs_trip_id]
        gtfs_trip_stop_data = gtfs_stops.loc[gtfs_trip_stops]
        gtfs_trip_stop_names = [
            gtfs_stop_name_to_comparable(item)
            for item in gtfs_trip_stop_data["stop_name"]
        ]

        if gtfs_trip_stop_names[1:-1] == relation_stop_names[1:-1]:
            stop_nodes_by_gtfs_trip_id[gtfs_trip_id] = [item.id for item in relation_stop_nodes]
            continue

        stop_nodes_from_mapping = [
            get_node_id_for_trip_stop(
                stop, i, len(gtfs_trip_stops)
            )
            for i, stop in enumerate(gtfs_trip_stops)
        ]

        if None in stop_nodes_from_mapping:
            gtfs_trips_with_missing_node_ids.append((gtfs_trip_id, stop_nodes_from_mapping))
        else:
            stop_nodes_by_gtfs_trip_id[gtfs_trip_id] = stop_nodes_from_mapping

    return (
        stop_nodes_by_gtfs_trip_id,
        gtfs_trips_with_missing_node_ids,
    )
    

(
    stop_nodes_by_gtfs_trip_id,
    gtfs_trips_with_missing_node_ids
) = get_stop_nodes_by_gtfs_trip_id()

In [22]:
stop_nodes_by_gtfs_trip_id

{'block_4_trip_1_service_4': [1882247981,
  2421481611,
  2052183690,
  2052183691,
  2052183688,
  2418596863,
  630945148,
  630945156,
  2419768568,
  2992273250,
  2419768586,
  2419768565,
  2419768561,
  2419720286,
  2419720287,
  4557118642,
  2419732952],
 'block_4_trip_2_service_4': [2419732952,
  2419732954,
  2425631194,
  2419720285,
  2419768560,
  2419768564,
  2419768566,
  2420263459,
  2424088455,
  2424076384,
  2426058893,
  2426058892,
  1764579350,
  2426058898,
  2426058890,
  3623688656,
  2426058897,
  1811875207,
  2426068337,
  3629965034,
  2426068335,
  2386355261,
  1769087965,
  1769087786,
  1769088034,
  10727973614,
  12297835418],
 'block_4_trip_3_service_4': [12297835419,
  10727973597,
  777395555,
  213585855,
  213585864,
  213585881,
  213585893,
  3629965037,
  638606093,
  213597071,
  2426058896,
  3623688655,
  638606061,
  213593804,
  638606056,
  213593828,
  213592007,
  2420246141,
  2424088456,
  2423003234,
  2424015724,
  2419768565,


In [23]:
gtfs_trips_with_missing_node_ids

[]

In [25]:
trips = {
    tuple(osm_node_by_id[item] for item in node_ids)
    for node_ids in stop_nodes_by_gtfs_trip_id.values()
}

unusual_trips_iter = iter(trips)

In [None]:
import folium

m = folium.Map(location=(50.05, 19.95), zoom_start=13)

tram_route = folium.PolyLine([(item.lat, item.lon) for item in next(unusual_trips_iter)])
tram_route.add_to(m)

m