# # OSM etymology data for verification

Testing your query for needed OSM informations: https://overpass-turbo.osm.ch/

Data model for streets (topic : highway): https://wiki.openstreetmap.org/wiki/Key%3ahighway

Query all streets with existing etymology designation in Switzerland

In [1]:
#Load all needed libraries
import requests
import pandas as pd
import osm2geojson
import geopandas as gpd
from time import process_time_ns

In [2]:
#overpass_url = "http://overpass-api.de/api/interpreter"
overpass_url = "https://lz4.overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
area["ISO3166-1"="CH"];
(
nwr["name:etymology:wikidata"]["highway"](area);
);
(._;>;);
out body;
"""
response = requests.get(overpass_url, data = overpass_query)
print(response)

<Response [200]>


In [3]:
data = response.json()
data

{'version': 0.6,
 'generator': 'Overpass API 0.7.59 e21c39fe',
 'osm3s': {'timestamp_osm_base': '2022-10-15T12:35:40Z',
  'timestamp_areas_base': '2022-10-15T11:41:38Z',
  'copyright': 'The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.'},
 'elements': [{'type': 'node',
   'id': 172271,
   'lat': 46.5241373,
   'lon': 6.5943654},
  {'type': 'node', 'id': 172272, 'lat': 46.5240114, 'lon': 6.5954457},
  {'type': 'node', 'id': 172273, 'lat': 46.5234607, 'lon': 6.5978601},
  {'type': 'node', 'id': 172274, 'lat': 46.5231984, 'lon': 6.5993696},
  {'type': 'node', 'id': 280590, 'lat': 46.5169062, 'lon': 6.6152151},
  {'type': 'node', 'id': 280591, 'lat': 46.5168753, 'lon': 6.6158445},
  {'type': 'node', 'id': 280592, 'lat': 46.5169596, 'lon': 6.6143229},
  {'type': 'node', 'id': 280595, 'lat': 46.5179349, 'lon': 6.6170132},
  {'type': 'node', 'id': 280596, 'lat': 46.5183436, 'lon': 6.6163655},
  {'type': 'node', 'id': 280597, 'lat': 46.518

In [4]:
dfp = pd.DataFrame(data["elements"])
dfp

Unnamed: 0,type,id,lat,lon,tags,nodes,members
0,node,172271,46.524137,6.594365,,,
1,node,172272,46.524011,6.595446,,,
2,node,172273,46.523461,6.597860,,,
3,node,172274,46.523198,6.599370,,,
4,node,280590,46.516906,6.615215,,,
...,...,...,...,...,...,...,...
31237,relation,3400183,,,"{'highway': 'pedestrian', 'layer': '5', 'name'...",,"[{'type': 'way', 'ref': 68687544, 'role': 'inn..."
31238,relation,4800640,,,"{'area': 'yes', 'highway': 'pedestrian', 'lit'...",,"[{'type': 'way', 'ref': 86090007, 'role': 'out..."
31239,relation,8437762,,,"{'alt_name': 'Le Corbusier-Platz', 'highway': ...",,"[{'type': 'way', 'ref': 606423418, 'role': 'ou..."
31240,relation,8688199,,,"{'bicycle': 'yes', 'highway': 'pedestrian', 'l...",,"[{'type': 'way', 'ref': 343989936, 'role': 'ou..."


In [5]:
geojson = osm2geojson.json2geojson(data)
geojson

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'type': 'node',
    'id': 40128715,
    'tags': {'highway': 'motorway_junction',
     'name': 'Sarnen Nord',
     'name:etymology:wikidata': 'Q63964',
     'ref': '36'}},
   'geometry': {'type': 'Point', 'coordinates': [8.2569925, 46.89458]}},
  {'type': 'Feature',
   'properties': {'type': 'node',
    'id': 249282757,
    'tags': {'highway': 'motorway_junction',
     'name': 'Buochs',
     'name:etymology:wikidata': 'Q64567',
     'ref': '34'}},
   'geometry': {'type': 'Point', 'coordinates': [8.4359813, 46.9722376]}},
  {'type': 'Feature',
   'properties': {'type': 'node',
    'id': 292003425,
    'tags': {'highway': 'motorway_junction',
     'name': 'Stans Nord',
     'name:etymology:wikidata': 'Q63931',
     'ref': '32'}},
   'geometry': {'type': 'Point', 'coordinates': [8.3491521, 46.9680282]}},
  {'type': 'Feature',
   'properties': {'type': 'node',
    'id': 443705798,
    'tags': {'addr:city': 'Be

In [6]:
geom = gpd.GeoDataFrame.from_features(geojson, crs="EPSG:4326")
geom

Unnamed: 0,geometry,type,id,tags,nodes
0,POINT (8.25699 46.89458),node,40128715,"{'highway': 'motorway_junction', 'name': 'Sarn...",
1,POINT (8.43598 46.97224),node,249282757,"{'highway': 'motorway_junction', 'name': 'Buoc...",
2,POINT (8.34915 46.96803),node,292003425,"{'highway': 'motorway_junction', 'name': 'Stan...",
3,POINT (7.43698 46.94589),node,443705798,"{'addr:city': 'Bern', 'addr:country': 'CH', 'a...",
4,POINT (7.43679 46.94590),node,443705802,"{'addr:city': 'Bern', 'addr:country': 'CH', 'a...",
...,...,...,...,...,...
4317,"MULTILINESTRING ((7.42169 46.94472, 7.42177 46...",relation,7757,"{'highway': 'secondary', 'name': 'Effingerstra...",
4318,"MULTIPOLYGON (((6.63465 46.52260, 6.63461 46.5...",relation,1715790,"{'area': 'yes', 'highway': 'pedestrian', 'name...",
4319,"MULTIPOLYGON (((6.63061 46.52095, 6.63049 46.5...",relation,4800640,"{'area': 'yes', 'highway': 'pedestrian', 'lit'...",
4320,"MULTIPOLYGON (((7.37492 46.94708, 7.37492 46.9...",relation,8437762,"{'alt_name': 'Le Corbusier-Platz', 'highway': ...",


In [7]:
info = pd.json_normalize(geom.to_dict('list'), ['tags'])
pd.options.display.max_rows = 50
pd.options.display.max_columns = 300
info.head(50)

Unnamed: 0,highway,name,name:etymology:wikidata,ref,addr:city,addr:country,addr:housenumber,addr:postcode,addr:street,entrance,public_transport,trolleybus,alt_name,brand,bus,ele,network,operator,shelter,source,uic_name,uic_ref,website,wheelchair,bench,bin,lit,tactile_paving,wlan,covered,network:wikidata,cycleway:both,lane_markings,maxspeed,sidewalk,surface,trolley_wire,motor_vehicle,oneway,cycleway,oneway:bicycle,lanes,lcn,note,owner,smoothness,wikidata,foot,name:etymology:wikipedia,bicycle:backward,bicycle:left,bus:lanes:backward,cycleway:right,lanes:backward,lanes:forward,old_name,vehicle:lanes:backward,turn:lanes:backward,turn:lanes:forward,bicycle:forward,bridge,cycleway:left,layer,maxweight:signed,motorcar:conditional,motorcycle:conditional,access,vehicle,sidewalk:both:surface,incline,noexit,shoulder,psv,sidewalk:left,sidewalk:right,check_date:cycleway,cycleway:left:lane,cycleway:left:oneway,image,name:source,name:start_date,wikimedia_commons,rcrc_ref,access:lanes:backward,bicycle:lanes:backward,cycleway:lanes:backward,official_name,junction,cycleway:right:lane,lanes:unmarked,mapillary,service,bicycle,dog,maxspeed:type,source:maxspeed,fixme,source:width,width,lanes:psv:backward,tunnel,cycleway:both:lane,access:conditional,wikipedia,lanes:psv:forward,source:name,maxheight,maxweight,turn:lanes,nat_ref,reg_ref,bicycle:lanes:forward,cycleway:lanes:forward,motor_vehicle:lanes:forward,motorcar,motorcycle,destination:backward,destination:lanes,day_off,day_on,hour_off,hour_on,tracktype,oneway:bus,oneway:moped,oneway:psv,access:lanes:forward,parking:lane:both,segregated,taxi,horse,handrail:left,ramp,step_count,sidewalk:left:surface,parking:lane:right,parking:lane:right:parallel,loc_ref,mofa,mofa:backward,motor_vehicle:backward,psv:backward,psv:lanes:forward,area,busway:left,lanes:psv,psv:lanes,parking:lane:left,parking:lane:left:parallel,cyclestreet,old_name:-1934,sac_scale,vehicle:lanes,sidewalk:right:surface,railway,hgv,handrail,busway:right,cycleway:right:oneway,oneway:foot,embankment,crossing,cycleway:surface,footway:surface,motor_vehicle:conditional,access:backward,short_name,crossing:island,footway,maxlength,sidewalk:both,destination:forward,destination:symbol:forward,start_date,disabled,destination,check_date:surface,agricultural,placement,oneway:hgv,bridge:name,parking:condition:right,parking:condition:right:maxstay,parking:condition:right:residents,level,check_date,proposed:trolley_wire,car,traffic_calming,vehicle:lanes:forward,oneway:mofa,postal_code,old_name:-2006,parking:lane,name:etymology,start_date:name,ramp:wheelchair,moped,handrail:right,trolley_wire:backward,psv:lanes:backward,parking:condition:both,parking:condition:both:maxstay,parking:condition:both:residents,avz,cs_dir:backward,cs_dir:forward,destination:symbol,maxspeed:backward,maxspeed:forward,lanes:directions,handrail:center,ford,mtb:scale,mtb:scale:imba,mtb:scale:uphill,trail_visibility,place,turn,loc_name,parking:condition:right:2,parking:condition:right:2:maxstay,parking:condition:right:2:time_interval,parking:condition:right:3,parking:condition:right:3:time_interval,parking:condition:right:default,parking:condition:right:time_interval,parking:lane:right:capacity,description,departures_board,overtaking,destination:lanes:backward,kerb:left,noname,cycleway:lane,pedestrian_lane,check_date:ramp,parking:condition:left,parking:lane:left:capacity,traffic_sign,trailer,cycleway:left:segregated,was:foot,bus:backward,type
0,motorway_junction,Sarnen Nord,Q63964,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,motorway_junction,Buochs,Q64567,34.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,motorway_junction,Stans Nord,Q63931,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,Q1296211,,Bern,CH,2,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,Q1296211,,Bern,CH,4,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,Q1296211,,Bern,CH,4a,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,,,Q1296211,,Bern,CH,6,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,,,Q1296211,,Bern,CH,6a,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,,,Q1296211,,Bern,CH,8,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,,,Q1296211,,Bern,CH,12,3011.0,Effingerstrasse,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [8]:
most_common = info.groupby("highway").count()
most_common.name.nlargest(30)

highway
residential          1562
footway               533
secondary             471
tertiary              381
primary               304
service               273
living_street         209
unclassified          198
pedestrian            134
steps                  96
path                   39
track                  24
trunk                  21
cycleway               18
bus_stop               18
motorway_junction       8
platform                7
primary_link            3
tertiary_link           3
trunk_link              1
Name: name, dtype: int64

In [9]:
verif = info[['name','name:etymology:wikidata']]
verif

Unnamed: 0,name,name:etymology:wikidata
0,Sarnen Nord,Q63964
1,Buochs,Q64567
2,Stans Nord,Q63931
3,,Q1296211
4,,Q1296211
...,...,...
4317,Effingerstrasse,Q1296211
4318,Place de la Cathédrale,Q669544
4319,Place de l'Europe,Q46
4320,Le-Corbusier-Platz,Q4724


In [10]:
#Drop when missing data
missing_data = pd.DataFrame(
    verif.isnull().sum(),
    columns=['Missing Values'])

print(missing_data)

verif = verif.dropna()

                         Missing Values
name                                 19
name:etymology:wikidata               0


In [11]:
#Drop duplicates
uniqueValues = verif.drop_duplicates()
uniqueValues

Unnamed: 0,name,name:etymology:wikidata
0,Sarnen Nord,Q63964
1,Buochs,Q64567
2,Stans Nord,Q63931
12,Tavelweg,Q124667
15,Willadingweg,Q1693700
...,...,...
3991,Hintere Erlachstrasse,Q123886
3992,Hardungweg,Q1495926
3993,Spühlweg,Q78071721;Q106809423
3994,Spühlstrasse,Q78071721;Q106809423


In [12]:
duplicateRows = uniqueValues[uniqueValues.duplicated(['name'])]
duplicateRows

Unnamed: 0,name,name:etymology:wikidata
509,Avenue de la Gare,Q669678
894,Weberstrasse,Q55903732
964,Schillerstrasse,Q107030305;Q22670
979,Place de la Gare,Q669678
1111,Simonstrasse,Q23063169
1351,Fröbelstrasse,Q76679
1758,Rue de la Tour,Q3533300
1775,Schlatterstrasse,Q43136553
2173,Hiltystrasse,Q55196805;Q120960


Drop these 9 non unique names

In [13]:
uniqueValues = uniqueValues.drop_duplicates(subset=['name'])

## merge verification data to streetnames via name 

In [14]:
# Street name designations
streetnames = pd.read_csv('../Datapreparation/streetnames.csv', encoding='UTF-8-SIG', sep=';')

<font color='red'>WARNING</font> ambiguity of names is not considered. Could be considered by spatial restriction but was left out here. 

In [15]:
merge = pd.merge(uniqueValues, streetnames, left_on=['name'], right_on=['STN_LABEL'])
merge

Unnamed: 0.1,name,name:etymology:wikidata,Unnamed: 0,STR_ESID,STN_LABEL,COM_FOSNR,COM_NAME,COM_CANTON,STR_OFFICIAL,STR_EASTING,STR_NORTHING,STN_LABEL_NO_BI,STN_LABEL_NO_TERMS,STR_TERMS,STR_PREPS,STN_LABEL_FINAL
0,Tavelweg,Q124667,3313,10022177,Tavelweg,2275,Murten,FR,True,2576509.0,1197547.0,Tavelweg,Tavel,weg,,Tavel
1,Tavelweg,Q124667,78725,10089404,Tavelweg,387,Lengnau (BE),BE,True,2594575.0,1225189.0,Tavelweg,Tavel,weg,,Tavel
2,Tavelweg,Q124667,80161,10117105,Tavelweg,356,Muri bei Bern,BE,True,2603562.0,1197746.0,Tavelweg,Tavel,weg,,Tavel
3,Tavelweg,Q124667,83217,10012974,Tavelweg,979,Herzogenbuchsee,BE,True,2620038.0,1225679.0,Tavelweg,Tavel,weg,,Tavel
4,Tavelweg,Q124667,83516,10198126,Tavelweg,880,Rüeggisberg,BE,True,2602847.0,1187735.0,Tavelweg,Tavel,weg,,Tavel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,Hintere Erlachstrasse,Q123886,153401,10225984,Hintere Erlachstrasse,3203,St. Gallen,SG,True,2743189.0,1252886.0,Hintere Erlachstrasse,Erlach,strasse,hinter,Erlach
4277,Hardungweg,Q1495926,152103,10225832,Hardungweg,3203,St. Gallen,SG,True,2748092.0,1254605.0,Hardungweg,Hardung,weg,,Hardung
4278,Spühlweg,Q78071721;Q106809423,165667,10246955,Spühlweg,3203,St. Gallen,SG,True,2749581.0,1256161.0,Spühlweg,Spühl,weg,,Spühl
4279,Spühlstrasse,Q78071721;Q106809423,165964,10246954,Spühlstrasse,3203,St. Gallen,SG,True,2749624.0,1256261.0,Spühlstrasse,Spühl,strasse,,Spühl


In [17]:
merge.to_csv(r'C:\CAS_Arbeit\cassda-zertifikatsarbeit\Evaluation\data\verif.csv', encoding='UTF-8-SIG', sep=';')