In [None]:
import overpy

# Initialize the Overpass API
api = overpy.Overpass()  # Read Only connection to OpenStreetMap

# Initialize sets to store unique names
organization_names = set()

# Overpass QL query to get all relevant healthcare facilities in Germany
overpass_query = """
[out:json];
area["ISO3166-1"="DE"][admin_level=2]->.de;
(
  node["office"]["name"](area.de);
  node["shop"]["name"](area.de);
  node["craft"]["name"](area.de);
  node["club"]["name"](area.de);
  node["industrial"]["name"](area.de);
);
out tags;
"""

# Execute the Overpass query
result = api.query(overpass_query)
# Print the number of elements found
print(f" {len(result.nodes)}\n")
for result in result.nodes:
    name = result.tags.get("name").strip()  # Get the name tag and strip whitespace
    # Check if the name is not empty and not "no name"
    if name and name.lower() != "no name":
        organization_names.add(name)
    
# Save all organization names to a text file
output_file = './data/OpenStreetMap_data/organizations_office_shop_craft_club_industrial.txt'
with open(output_file, 'w', encoding='utf-8') as f:
    for name in sorted(organization_names):
        f.write(name + '\n')
        
        
# office -> Company headquarters, government departments, NGOs, professional practices	company, ngo, government, it, lawyer, accountant, …
            # Reutlinger Stadtverkehr
            # Evangelische Hoffnungsgemeinde Karlsruhe
            # Agentur für Arbeit / Nordanlage
            # Sofort Sanierung 24
            # Bürgeramt Dornbusch
            # Institut für Erziehungswissenschaften
            # Ludwig-Uhland-Institut
            # Kone
            # Wilhelm-Schickard-Institut für Informatik
            # Stadtmobil CarSharing GmbH & Co. KG
# amenity -> Public-facing services and institutions	school, university, townhall, courthouse, library, bank, …
            # Papa-Pizza
            # STARCAR
            # Hartis Cafe
            # Mainhausen
            # Grundmühle
            # Toilette der Technischen Universität Darmstadt
            # Shell
            # Rheinfähre Neuburg
            # Rheinfähre Neuburg
            # Wanderparkplatz
# shop -> Retail outlets and chain stores	supermarket, department_store, car, clothes, …
            # Lidl
            # EDEKA
            # Backkultur
            # Eiscafe Pandolfo
            # Bäckerei Grimminger
            # Mein Markt
            # ABC Anneroses Beauty Center
            # Getränkemarkt Fuchs
            # Shell
            # PENNY
# craft -> Small workshops and tradespeople	plumber, electrician, tailor, brewery, …
            # Textil Perle
            # Brauerei Gasthaus Friedel
            # Weingut Peter
            # Autohaus Smagon
            # Brauerei Gasthaus Seitz (Elch-Bräu)
            # Damm
            # Tischlerei Kahl
            # Niedersächsisches Staatstheater Werkstätten
            # Studioline Photography
            # Fotostudio Bremen
# club -> Clubhouses and regular meeting venues for associations	sports, youth, chess, rotary, …
            # Fußballclub Lichtental
            # Musikverein Schwabegg
            # KW76
            # Landessportbund Hessen
            # Forschungsgemeinschaft elektronische Medien e.V.
            # Tennisclub Lüchow
            # Geschäftsstelle TV Unterboigingen
            # Kirchheimer Sport-Club e.V.
            # FSV Wehringen - Fußballplatz
            # Neues Nöschenröder Schützenhaus
# industrial -> Type of industry carried out at an industrial site	food, electronics, automotive, …
            # Herforder Brauerei
            # TEAM - PACK GmbH
            # Schwarzer Rührtechnik
            # RSM KFZ-Service & Tuning
            # Haumann
            # Mauroschat & Rieche GbR
            # Ruhlamat
            # Getir
            # Stadtwerke Lübeck Mobil
            # Oberelbe GmbH


# Location organization dataset

In [28]:
import overpy, pathlib

TAGS = ["office", "craft", "club", "industrial"]
# TAGS = ["office", "shop", "craft", "club", "industrial"]

QUERY_TMPL = """
[out:json];
area["ISO3166-1"="DE"][admin_level=2]->.de;
node[{tag}]["name"](area.de);
out tags;
"""


api   = overpy.Overpass()
names = set()        

for tag in TAGS:
    q = QUERY_TMPL.format(tag=tag)
    print(f"→ querying {tag} …", end=" ", flush=True)
    result = api.query(q)
    print(f"{len(result.nodes):,} nodes")
    for n in result.nodes:
        name = (n.tags.get("name") or "").strip()
        if name and name.lower() != "no name":
            names.add(name)

print(f"\nTotal unique names: {len(names):,}")
out_path = pathlib.Path("./data/OpenStreetMap_data")
out_path.mkdir(parents=True, exist_ok=True)
with out_path.joinpath("organizations_office_shop_craft_club_industrial.txt").open("w", encoding="utf-8") as fh:
    for name in sorted(names, key=str.casefold):
        fh.write(name + "\n")


→ querying office … 

98,323 nodes
→ querying craft … 36,853 nodes
→ querying club … 7,912 nodes
→ querying industrial … 300 nodes

Total unique names: 128,136


# Playground

In [2]:
import spacy

# Load the SpaCy German model for NER
nlp = spacy.load("de_core_news_lg") # python -m spacy download de_dep_news_trf


# Example text
text = "Allgemeinarztpraxis Dr. Killer"

# Process the text with the NER model
doc = nlp(text)

# Print the recognized entities and their labels
for ent in doc.ents:
    print(f"{ent.text} -> {ent.label_}")

for token in doc:
    print(f"Token: {token.text}, POS: {token.pos_}") # POS: NOUN(Common Noun) POS: PROPN (specific Names)

Allgemeinarztpraxis Dr. Killer -> MISC
Token: Allgemeinarztpraxis, POS: NOUN
Token: Dr., POS: NOUN
Token: Killer, POS: NOUN


# Location other 

In [None]:
import overpy
import pathlib
import time
import re

# Initialize the Overpass API client
api = overpy.Overpass()

# Define Primary Keys and their Specific Values to Query
# The main key is the OSM primary key.
# The list associated with it contains specific values for that key.
# 'None' in the list means query for the key itself (e.g., amenity=*).
# https://wiki.openstreetmap.org/wiki/Map_features
tags_and_their_values_to_query = {
    "aerialway": [
        None, "cable_car", "gondola", "mixed_lift", "chair_lift", "drag_lift",
        "t-bar", "j-bar", "platter", "rope_tow", "magic_carpet",
        "zip_line", "goods", "pylon", "station"
    ],
    "aeroway": [
        None, "aerodrome", "aircraft_crossing", "apron", "gate", "hangar",
        "helipad", "heliport", "navigationaid", "runway", "spaceport",
        "taxiway", "terminal", "windsock"
    ],
    "amenity": [
        None, "bar", "biergarten", "cafe", "fast_food", "food_court", "ice_cream", "pub", "restaurant",
        "college", "dancing_school", "driving_school", "first_aid_school", "kindergarten",
        "language_school", "library", "surf_school", "toy_library", "research_institute",
        "training", "music_school", "school", "traffic_park", "university",
        "bicycle_parking", "bicycle_repair_station", "bicycle_rental", "bicycle_wash",
        "boat_rental", "boat_sharing", "bus_station", "car_rental", "car_sharing",
        "car_wash", "compressed_air", "vehicle_inspection", "charging_station",
        "driver_training", "ferry_terminal", "fuel", "grit_bin", "motorcycle_parking",
        "parking", "parking_entrance", "parking_space", "taxi", "weighbridge",
        "atm", "payment_terminal", "bank", "bureau_de_change", "money_transfer", "payment_centre",
        "baby_hatch", "clinic", "dentist", "doctors", "hospital", "nursing_home",
        "pharmacy", "social_facility", "veterinary",
        "arts_centre", "brothel", "casino", "cinema", "community_centre", "conference_centre",
        "events_venue", "exhibition_centre", "fountain", "gambling", "love_hotel",
        "music_venue", "nightclub", "planetarium", "public_bookcase", "social_centre",
        "stage", "stripclub", "studio", "swingerclub", "theatre",
        "courthouse", "fire_station", "police", "post_box", "post_depot", "post_office",
        "prison", "ranger_station", "townhall",
        "bbq", "bench", "dog_toilet", "dressing_room", "drinking_water", "give_box",
        "lounge", "mailroom", "parcel_locker", "shelter", "shower", "telephone", "toilets",
        "water_point", "watering_place",
        "sanitary_dump_station", "recycling", "waste_basket", "waste_disposal", "waste_transfer_station",
        "animal_boarding", "animal_breeding", "animal_shelter", "animal_training",
        "baking_oven", "clock", "crematorium", "dive_centre", "funeral_hall",
        "grave_yard", "hunting_stand", "internet_cafe", "kitchen", "kneipp_water_cure",
        "lounger", "marketplace", "monastery", "mortuary", "photo_booth",
        "place_of_mourning", "place_of_worship", "public_bath", "public_building",
        "refugee_site", "vending_machine"
    ],
    "barrier": [
        None, "cable_barrier", "city_wall", "ditch", "fence", "guard_rail", "handrail",
        "hedge", "kerb", "retaining_wall", "wall",
        "block", "bollard", "border_control", "bump_gate", "bus_trap", "cattle_grid",
        "chain", "cycle_barrier", "debris", "entrance", "full-height_turnstile", "gate",
        "hampshire_gate", "height_restrictor", "horse_stile", "jersey_barrier",
        "kissing_gate", "lift_gate", "log", "motorcycle_barrier", "rope", "sally_port",
        "spikes", "stile", "sump_buster", "swing_gate", "toll_booth", "turnstile", "yes"
    ],
    "boundary": [
        None, "aboriginal_lands", "administrative", "border_zone", "census", "forest",
        "forest_compartment", "hazard", "health", "historic", "limited_traffic_zone",
        "local_authority", "low_emission_zone", "maritime", "marker", "national_park",
        "place", "political", "postal_code", "protected_area", "religious_administration",
        "special_economic_zone", "statistical", "disputed", "timezone"
    ],
    "building": [
        None, "apartments", "barracks", "bungalow", "cabin", "detached", "annexe", "dormitory",
        "farm", "ger", "hotel", "house", "houseboat", "residential", "semidetached_house",
        "static_caravan", "stilt_house", "terrace", "tree_house", "trullo",
        "commercial", "industrial", "kiosk", "office", "retail", "supermarket", "warehouse",
        "religious", "cathedral", "chapel", "church", "kingdom_hall", "monastery", "mosque",
        "presbytery", "shrine", "synagogue", "temple",
        "bakehouse", "bridge", "civic", "college", "fire_station", "government", "gatehouse",
        "hospital", "kindergarten", "museum", "public", "school", "toilets", "train_station",
        "transportation", "university",
        "barn", "conservatory", "cowshed", "farm_auxiliary", "greenhouse", "slurry_tank",
        "stable", "sty", "livestock",
        "grandstand", "pavilion", "riding_hall", "sports_hall", "sports_centre", "stadium",
        "allotment_house", "boathouse", "hangar", "hut", "shed",
        "carport", "garage", "garages", "parking",
        "digester", "service", "tech_cab", "transformer_tower", "water_tower", "storage_tank", "silo",
        "beach_hut", "bunker", "castle", "construction", "container", "guardhouse", "military",
        "outbuilding", "pagoda", "quonset_hut", "roof", "ruins", "ship", "tent", "tower",
        "triumphal_arch", "windmill", "yes"
    ],
    "craft": [
        None, "agricultural_engines", "atelier", "bag_repair", "bakery", "basket_maker", "beekeeper",
        "blacksmith", "boatbuilder", "bookbinder", "brewery", "builder", "cabinet_maker",
        "candlemaker", "car_painter", "carpenter", "carpet_cleaner", "carpet_layer", "caterer",
        "chimney_sweeper", "cleaning", "clockmaker", "clothes_mending", "confectionery", "cooper",
        "dental_technician", "distillery", "door_construction", "dressmaker", "electrician",
        "electronics_repair", "elevator", "embroiderer", "engraver", "fence_maker", "floorer",
        "gardener", "glassblower", "glaziery", "goldsmith", "grinding_mill", "handicraft",
        "hvac", "insulation", "interior_decorator", "interior_work", "jeweller", "joiner",
        "key_cutter", "laboratory", "lapidary", "leather", "locksmith", "luthier",
        "metal_construction", "mint", "musical_instrument", "oil_mill", "optician",
        "organ_builder", "painter", "paperhanger", "parquet_layer", "paver", "pest_control",
        "photographer", "photographic_laboratory", "photovoltaic", "piano_tuner", "plasterer",
        "plumber", "pottery", "printer", "printmaker", "restoration", "rigger", "roofer",
        "saddler", "sailmaker", "sawmill", "scaffolder", "sculptor", "shoemaker", "signmaker",
        "stand_builder", "stonemason", "stove_fitter", "sun_protection", "tailor", "tatami",
        "tiler", "tinsmith", "toolmaker", "turner", "upholsterer", "watchmaker",
        "water_well_drilling", "weaver", "welder", "window_construction", "winery"
    ],
    "emergency": [
        None, "ambulance_station", "defibrillator", "landing_site", "emergency_ward_entrance",
        "fire_service_inlet", "fire_alarm_box", "fire_extinguisher", "fire_hose", "fire_hydrant",
        "water_tank", "suction_point", "lifeguard", "life_ring", "assembly_point",
        "phone", "siren", "drinking_water"
    ],
    "geological": [
        None, "moraine", "outcrop", "volcanic_caldera_rim", "fault", "fold", "palaeontological_site",
        "volcanic_lava_field", "volcanic_vent", "glacial_erratic", "rock_glacier", "giants_kettle",
        "meteor_crater", "hoodoo", "columnar_jointing", "dyke", "monocline", "tor", "unconformity",
        "cone", "sinkhole", "pingo", "inselberg", "limestone_pavement"
    ],
    "healthcare": [
        None, "alternative", "audiologist", "birthing_centre", "blood_bank", "blood_donation",
        "counselling", "dialysis", "hospice", "laboratory", "midwife", "nurse",
        "occupational_therapist", "optometrist", "physiotherapist", "podiatrist",
        "psychotherapist", "rehabilitation", "sample_collection", "speech_therapist", "vaccination_centre"
    ],
    "highway": [
        None, "motorway_junction", "bus_stop", "crossing", "elevator", "emergency_access_point",
        "give_way", "mini_roundabout", "passing_place", "rest_area", "services", "speed_camera",
        "stop", "street_lamp", "toll_gantry", "traffic_signals", "trailhead", "turning_circle",
        "turning_loop", "milestone", "emergency_bay", "platform", "cyclist_waiting_aid", "ladder"
    ],
    "historic": [
        None, "aircraft", "anchor", "aqueduct", "archaeological_site", "battlefield", "bomb_crater",
        "boundary_stone", "building", "bullaun_stone", "cannon", "castle", "castle_wall",
        "charcoal_pile", "church", "city_gate", "citywalls", "creamery",
        "district", "epigraph", "farm", "fort", "gallows", "house", "high_cross",
        "highwater_mark", "lavoir", "lime_kiln", "locomotive", "machine", "manor",
        "memorial", "milestone", "millstone", "mine", "minecart", "monastery", "monument",
        "mosque", "ogham_stone", "optical_telegraph", "pa", "pillory", "pound",
        "railway_car", "road", "round_tower", "ruins", "rune_stone", "shieling", "ship",
        "stećak", "stone", "tank", "temple", "tomb", "tower", "vehicle", "wayside_cross",
        "wayside_shrine", "wreck", "yes"
    ],
    "landuse": [
        None, "commercial", "construction", "education", "fairground", "industrial", "residential", "retail", "institutional",
        "aquaculture", "allotments", "farmland", "farmyard", "paddy", "animal_keeping", "flowerbed",
        "forest", "logging", "greenhouse_horticulture", "meadow", "orchard", "plant_nursery", "vineyard",
        "basin", "reservoir", "salt_pond",
        "brownfield", "cemetery", "conservation", "depot", "garages", "grass", "greenfield",
        "landfill", "military", "port", "quarry", "railway", "recreation_ground", "religious",
        "village_green", "greenery", "winter_sports"
    ],
    "leisure": [
        None, "adult_gaming_centre", "amusement_arcade", "beach_resort", "bandstand", "bird_hide",
        "common", "dance", "disc_golf_course", "dog_park", "escape_game", "firepit",
        "fishing", "fitness_centre", "fitness_station", "garden", "hackerspace", "horse_riding",
        "ice_rink", "marina", "miniature_golf", "nature_reserve", "park", "picnic_table",
        "pitch", "playground", "slipway", "sports_centre", "stadium", "summer_camp",
        "swimming_area", "swimming_pool", "track", "water_park"
    ],
    "man_made": [
        None, "adit", "beacon", "breakwater", "bridge", "bunker_silo", "carpet_hanger", "chimney",
        "column", "communications_tower", "crane", "cross", "cutline", "clearcut", "dovecote",
        "dyke", "embankment", "flagpole", "gasometer", "goods_conveyor", "groyne", "guard_stone",
        "kiln", "lighthouse", "mast", "mineshaft", "monitoring_station", "obelisk", "observatory",
        "offshore_platform", "petroleum_well", "pier", "pipeline", "pump", "pumping_station",
        "reservoir_covered", "sewer_vent", "silo", "snow_fence", "snow_net", "storage_tank",
        "street_cabinet", "stupa", "surveillance", "survey_point", "tailings_pond", "telescope",
        "tower", "video_wall", "wastewater_plant", "watermill", "water_tower", "water_well",
        "water_tap", "water_works", "wildlife_crossing", "windmill", "works", "yes"
    ],
    "military": [
        None, "academy", "airfield", "base", "bunker", "barracks", "checkpoint", "danger_area",
        "nuclear_explosion_site", "obstacle_course", "office", "range", "school",
        "training_area", "trench"
    ],
    "natural": [
        None, "fell", "grassland", "heath", "moor", "scrub", "shrubbery", "tree", "tree_row", "tundra", "wood",
        "bay", "beach", "blowhole", "cape", "coastline", "crevasse", "geyser", "glacier",
        "hot_spring", "isthmus", "mud", "peninsula", "reef", "shingle", "shoal", "spring",
        "strait", "water", "wetland",
        "arch", "arete", "bare_rock", "blockfield", "cave_entrance", "cliff", "dune",
        "earth_bank", "fumarole", "hill", "peak", "ridge", "rock", "saddle", "sand",
        "scree", "sinkhole", "stone", "valley", "volcano"
    ],
    "office": [
        None, "accountant", "advertising_agency", "airline", "architect", "association", "broadcaster",
        "chamber", "charity", "company", "construction_company", "consulting", "courier",
        "coworking", "diplomatic", "educational_institution", "employment_agency", "energy_supplier",
        "engineer", "estate_agent", "event_management", "financial", "financial_advisor",
        "forestry", "foundation", "geodesist", "government", "graphic_design", "guide",
        "harbour_master", "insurance", "it", "lawyer", "logistics", "moving_company",
        "newspaper", "ngo", "notary", "politician", "political_party", "property_management",
        "publisher", "quango", "religion", "research", "security", "surveyor", "tax_advisor",
        "telecommunication", "transport", "travel_agent", "tutoring", "union", "university",
        "visa", "water_utility", "yes"
    ],
    "place": [
        None, "country", "state", "region", "province", "district", "county", "subdistrict", "municipality",
        "city", "borough", "suburb", "quarter", "neighbourhood", "city_block", "plot",
        "town", "village", "hamlet", "isolated_dwelling", "farm", "allotments",
        "continent", "archipelago", "island", "islet", "square", "locality", "polder", "sea", "ocean"
    ],
    "power": [
        None, "cable", "catenary_mast", "compensator", "connection", "converter", "generator",
        "heliostat", "insulator", "inverter", "line", "minor_line", "plant", "pole", "portal",
        "substation", "switch", "switchgear", "terminal", "tower", "transformer"
    ],
    "public_transport": [
        None, "stop_position", "platform", "station", "stop_area", "stop_area_group"
    ],
    "railway": [
        None, "halt", "station", "stop", "subway_entrance", "tram_stop", "platform",
        "buffer_stop", "crossing", "derail", "level_crossing", "railway_crossing", "roundhouse",
        "signal", "switch", "tram_level_crossing", "traverser", "turntable", "ventilation_shaft",
        "wash", "water_crane"
    ],
    "route": [
        None, "bicycle", "bus", "canoe", "detour", "ferry", "foot", "hiking", "horse",
        "inline_skates", "light_rail", "mtb", "piste", "railway", "road", "running",
        "ski", "subway", "train", "tracks", "tram", "trolleybus"
    ],
    "shop": [
        None, "alcohol", "bakery", "beverages", "brewing_supplies", "butcher", "cheese", "chocolate",
        "coffee", "confectionery", "convenience", "dairy", "deli", "farm", "food", "frozen_food",
        "greengrocer", "health_food", "ice_cream", "nuts", "pasta", "pastry", "seafood",
        "spices", "tea", "tortilla", "water", "wine",
        "department_store", "general", "kiosk", "mall", "supermarket", "wholesale",
        "baby_goods", "bag", "boutique", "clothes", "fabric", "fashion_accessories", "fashion",
        "jewelry", "leather", "sewing", "shoes", "shoe_repair", "tailor", "watches", "wool",
        "charity", "second_hand", "variety_store",
        "beauty", "chemist", "cosmetics", "erotic", "hairdresser", "hairdresser_supply",
        "hearing_aids", "herbalist", "massage", "medical_supply", "nutrition_supplements",
        "optician", "perfumery", "tattoo",
        "agrarian", "appliance", "bathroom_furnishing", "country_store", "doityourself",
        "electrical", "energy", "fireplace", "florist", "garden_centre", "garden_furniture",
        "gas", "glaziery", "groundskeeping", "hardware", "houseware", "locksmith", "paint",
        "pottery", "security", "tool_hire", "trade",
        "antiques", "bed", "candles", "carpet", "curtain", "doors", "flooring", "furniture",
        "household_linen", "interior_decoration", "kitchen", "lighting", "tiles", "window_blind",
        "computer", "electronics", "hifi", "mobile_phone", "printer_ink", "radiotechnics",
        "telecommunication", "vacuum_cleaner",
        "atv", "bicycle", "boat", "car", "car_parts", "car_repair", "caravan", "fishing", "fuel",
        "golf", "hunting", "military_surplus", "motorcycle", "motorcycle_repair", "outdoor",
        "scuba_diving", "ski", "snowmobile", "sports", "surf", "swimming_pool", "trailer",
        "truck", "tyres",
        "art", "camera", "collector", "craft", "frame", "games", "model", "music",
        "musical_instrument", "photo", "trophy", "video", "video_games",
        "anime", "books", "gift", "lottery", "newsagent", "stationery", "ticket",
        "bookmaker", "cannabis", "copyshop", "dry_cleaning", "e-cigarette", "funeral_directors",
        "laundry", "money_lender", "outpost", "party", "pawnbroker", "pest_control", "pet",
        "pet_grooming", "pyrotechnics", "religion", "rental", "storage_rental", "tobacco",
        "toys", "travel_agency", "vacant", "weapons", "yes"
    ],
    "telecom": [
        None, "exchange", "connection_point", "distribution_point", "service_device", "data_center", "line"
    ],
    "tourism": [
        None, "alpine_hut", "apartment", "aquarium", "artwork", "attraction", "camp_pitch",
        "camp_site", "caravan_site", "chalet", "gallery", "guest_house", "hostel",
        "hotel", "information", "motel", "museum", "picnic_site", "theme_park",
        "viewpoint", "wilderness_hut", "zoo", "yes"
    ],
    "water": [
        None, "river", "oxbow", "canal", "ditch", "lock", "fish_pass", "lake", "reservoir",
        "pond", "basin", "lagoon", "stream_pool", "reflecting_pool", "moat", "wastewater"
    ],
    "waterway": [
        None, "dam", "weir", "waterfall", "lock_gate", "soakhole", "turning_point", "water_point", "fuel",
        "dock", "boatyard"
    ]
}



all_unique_names = set()
QUERY_LIMIT = 100
# --- Iterate and Query ---

for osm_key, specific_values_list in tags_and_their_values_to_query.items():
    for specific_osm_value in specific_values_list:
        # Construct the query tag filter part and a description for logging
        if specific_osm_value is None:
            # Query for the primary key only, e.g., node["amenity"]["name"]
            query_tag_filter_on_node = f'node["{osm_key}"]["name"]'
            current_query_description = f"{osm_key}"
        else:
            # Query for key=value, e.g., node["amenity"="restaurant"]["name"]
            query_tag_filter_on_node = f'node["{osm_key}"="{specific_osm_value}"]["name"]'

        # Construct the Overpass QL query
        # Sticking to querying only 'node' elements as per your initial condensed script style.
        overpass_query = f"""
        [out:json];
        area["ISO3166-1"="DE"][admin_level=2]->.searchArea;
        (
          {query_tag_filter_on_node}(area.searchArea);
        );
        out body {QUERY_LIMIT};
        """

        try:
            result = api.query(overpass_query)
            nodes_found_count = len(result.nodes)
            
            new_names_this_query = 0
            for n in result.nodes:
                if "name" in n.tags:
                    name = n.tags.get("name").strip()
                    # Filter out common "no name" placeholders and names with only special chars/numbers
                    if name and name.lower() not in ["no name", "noname", "unbenannt", "(unbenannt)"]:
                        # Check if name contains at least one letter (not just numbers and special chars)
                        if re.search('[a-zA-Z]', name):
                            if name not in all_unique_names:
                                new_names_this_query +=1
                            all_unique_names.add(name)
            
            print(f"  Found {nodes_found_count:,} nodes. Added {new_names_this_query:,} new unique names. Total unique names: {len(all_unique_names):,}")

        except overpy.exception.OverpassTooManyRequests as e:
            print(f"  ERROR for '{current_query_description}': Too many requests. {e}. Sleeping for 60s.")
            time.sleep(60) # Wait longer if rate limited
        except overpy.exception.OverpassGatewayTimeout as e:
            print(f"  ERROR for '{current_query_description}': Gateway Timeout. {e}. Query might have been too broad or server busy.")
        except overpy.exception.OverpassBadRequest as e:
            print(f"  ERROR for '{current_query_description}': Bad Request. {e}")
            # For debugging the query if needed:
            print(f"  Problematic Query:\n{overpass_query}")
        except Exception as e:
            print(f"  An UNEXPECTED ERROR occurred for '{current_query_description}': {type(e).__name__} - {e}")
    

print(f"\nTotal unique names collected: {len(all_unique_names):,}")

# --- Save to File ---
out_path_dir = pathlib.Path("/home/mseiferling/vector_search/data/location_other")
# Changed filename slightly to reflect it covers specific values now
output_file = out_path_dir.joinpath("location_other__osm_primary_map_features.txt")

with output_file.open("w", encoding="utf-8") as fh:
    for name in sorted(list(all_unique_names), key=str.casefold):
        fh.write(name + "\n")

  Found 100 nodes. Added 84 new unique names. Total unique names: 84
  Found 0 nodes. Added 0 new unique names. Total unique names: 84
  Found 0 nodes. Added 0 new unique names. Total unique names: 84
  Found 0 nodes. Added 0 new unique names. Total unique names: 84
  Found 0 nodes. Added 0 new unique names. Total unique names: 84
  Found 2 nodes. Added 2 new unique names. Total unique names: 86
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  ERROR for 'aerialway': Too many requests. Too many requests. Sleeping for 60s.
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  Found 0 nodes. Added 0 new unique names. Total unique names: 86
  Found 40 nodes. Added 20 new unique names. Total unique names: 106
  Found 100 nodes. Added 1 new unique names. Total uniq

In [None]:
import overpy, pathlib

# Initialize the Overpass API client
api = overpy.Overpass()

# Primary feature keys extracted from the OSM Wiki "Map Features" page
osm_primary_keys = [
    "aerialway", "aeroway", "amenity", "barrier", "boundary", "building",
    "craft", "emergency", "geological", "healthcare", "highway", "historic",
    "landuse", "leisure", "man_made", "military", "natural", "office",
    "place", "power", "public_transport", "railway", "route", "shop",
    "telecom", "tourism", "water", "waterway"
]

names = set()
for osm_key in osm_primary_keys:
    overpass_query = f"""
    [out:json];
    area["ISO3166-1"="DE"][admin_level=2]->.de;
    (
      node["{osm_key}"]["name"](area.de);
    );
    out body 500;
    """

    result = api.query(overpass_query)
    print(f"{osm_key}:{len(result.nodes):,} nodes")
    # Extract names from nodes
    for n in result.nodes:
        if "name" in n.tags:
            name = n.tags.get("name").strip() 
            if name and name.lower() != "no name":
                names.add(name)
                

print(f"\nTotal unique names: {len(names):,}")
out_path = pathlib.Path("./data/OpenStreetMap_data")
out_path.mkdir(parents=True, exist_ok=True)
with out_path.joinpath("location_other_osm_primary_features.txt").open("w", encoding="utf-8") as fh:
    for name in sorted(names, key=str.casefold):
        fh.write(name + "\n")