# Organization Data Collection
This notebook queries OpenStreetMap via the Overpass API to gather German organization names based on several tags.

In [None]:
import overpy, pathlib

# Tags that represent organizations in OSM
TAGS = ["office", "craft", "club", "industrial"]
API = overpy.Overpass()


In [None]:
names = set()
QUERY_TMPL = """
[out:json];
area["ISO3166-1"="DE"][admin_level=2]->.de;
node[{tag}]["name"](area.de);
out body;
"""

for tag in TAGS:
    q = QUERY_TMPL.format(tag=tag)
    print(f"→ querying {tag} …", end=" ")
    result = API.query(q)
    print(f"{len(result.nodes):,} nodes")
    for n in result.nodes:
        name = (n.tags.get("name") or "").strip()
        if name and name.lower() != "no name":
            names.add(name)

print(f"
Total unique names: {len(names):,}")


In [None]:
out_path = pathlib.Path("data/organization")
out_path.mkdir(parents=True, exist_ok=True)
with out_path.joinpath("organizations_office_craft_club_industrial.txt").open("w", encoding="utf-8") as fh:
    for name in sorted(names, key=str.casefold):
        fh.write(name + "
")
print("Saved to", fh.name)
