In [35]:
import pandas as pd
import numpy as np

streetnames = pd.read_csv('streetnames.csv', encoding='UTF-8-SIG', sep=';')
streetnames.head()

Unnamed: 0.1,Unnamed: 0,STR_ESID,STN_LABEL,COM_FOSNR,COM_NAME,COM_CANTON,STR_OFFICIAL,STR_EASTING,STR_NORTHING,STN_LABEL_NO_BI,STN_LABEL_NO_TERMS,STR_TERMS,STR_PREPS,STN_LABEL_FINAL
0,1,10023770,Wiedenweg,2786,Grellingen,BL,True,2610733.0,1254311.0,Wiedenweg,Wieden,weg,,Wieden
1,2,10179192,Wuhrbärgli,2788,Liesberg,BL,True,2598709.0,1249640.0,Wuhrbärgli,Wuhrbärgli,,,Wuhrbärgli
2,9,10140563,Emanuelenweg,2829,Liestal,BL,True,2623078.0,1257558.0,Emanuelenweg,Emanuelen,weg,,Emanuelen
3,13,10069457,Löhrweg,2850,Känerkinden,BL,True,2630229.0,1251411.0,Löhrweg,Löhr,weg,,Löhr
4,15,10096235,Brunngasse,2833,Seltisberg,BL,True,2621406.0,1256852.0,Brunngasse,Brunn,gasse,,Brunn


In [8]:
from SPARQLWrapper import SPARQLWrapper, JSON

wdUrl = "https://query.wikidata.org/sparql"
sparql = SPARQLWrapper(wdUrl)

def queryWd(sparql, subject):
    #print(wdKey)
    query = """
    SELECT ?subject ?subjectLabel ?instanceLabel WHERE {
      ?subject rdfs:label "%s"@de;
               wdt:P31 ?instance.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "de" . }   
    }
    """ % (subject)
    #print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql

In [77]:
def queryPeople(sparql, subject):
    #print(wdKey)
    query = """
    SELECT DISTINCT ?item ?itemLabel ?dateOfBirth ?sex
    WHERE {
      hint:Query hint:optimizer "None".
      SERVICE wikibase:mwapi {
        bd:serviceParam wikibase:api "Search";
                        wikibase:endpoint "www.wikidata.org";
                        mwapi:srsearch "%s haswbstatement:P31=Q5".
        ?item wikibase:apiOutputItem mwapi:title .
      }
      OPTIONAL {?item wdt:P569 ?dateOfBirth  . }
      OPTIONAL {?item wdt:P21 ?sex  . }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
    }
    """ % (subject)
    #print(query)
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql

In [None]:
#Query streets

SELECT ?subject ?subjectLabel ?instanceLabel  ?namedAfter

WHERE {?subject rdfs:label 'Dufourstrasse'@de;
                wdt:P31 ?instance .
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]" . }  
       
      OPTIONAL {?subject wdt:P138 ?namedAfter  . }
      
      
      }

In [73]:
import requests
  
def get_retry_after():
    response = requests.get(wdUrl)
    retry_after = response.getheader("Retry-After")

    if retry_after is None:
        return None

    print(retry_after)

In [87]:
i = 0
dfr = pd.DataFrame(columns=["Name", "wikiQLabel", "wikiQ", "instance"])

maxresults = 10

for x in only_more_2_words.index:
    i+=1
    #print(i)
    subject = only_more_2_words['STN_LABEL_FINAL'][x]
    subjectStr = only_more_2_words['STN_LABEL'][x]
    #print(f"Subjekt: {subject}")
    sparql = queryWd(sparql, subject)
    #print(sparql)
    try:
        results = sparql.query()
        #print(results.info())
    except Exception as e:
        #Prüfen auf Statuscode 429 (Too many Requests). Leider kann der Statuscode nicht abgerufen werden..
        print("Sollte ein Statuscode 429 auftreten: Anfrage in ca. 30sec wiederholen")
        print(e)
        get_retry_after()
        break

    result = results.convert()
    #print(result)

    results_df = pd.json_normalize(result['results']['bindings'])
    #print(results_df)

    if not results_df.empty:
        wikiQ = results_df['subject.value'][0]
        wikiQLabel = results_df['subjectLabel.value'][0]
        instance = results_df['instanceLabel.value'][0]

        #print(f"{subjectStr} | {wikiQLabel}: {wikiQ} -> {instance}")
        dfrtemp = pd.DataFrame([[subjectStr, wikiQLabel, wikiQ, instance]], columns=dfr.columns)
        dfr = pd.concat([dfr, dfrtemp])

    else:
        #print(f"{subjectStr} | {subject}: Kein Eintrag in Wikidata gefunden")
        dfrtemp = pd.DataFrame([[subjectStr, subject, np.nan, np.nan]], columns=dfr.columns)
        dfr = pd.concat([dfr, dfrtemp])

    #print(x)
    if i > maxresults:
        break

In [142]:
i = 0
dfr = pd.DataFrame(columns=["Name", "wikiQLabel", "wikiQ", "dateBirth", "sex"])

maxresults = 50000

for x in only_more_2_words.index:
    i+=1
    #print(i)
    subject = only_more_2_words['STN_LABEL_FINAL'][x]
    subjectStr = only_more_2_words['STN_LABEL'][x]
    #print(f"Subjekt: {subject}")
    sparql = queryPeople(sparql, subject)
    #print(sparql)
    try:
        results = sparql.query()
        #print(results.info())
    except Exception as e:
        #Prüfen auf Statuscode 429 (Too many Requests). Leider kann der Statuscode nicht abgerufen werden..
        print("Sollte ein Statuscode 429 auftreten: Anfrage in ca. 30sec wiederholen")
        print(e)
        get_retry_after()
        break

    result = results.convert()
    #print(result)

    results_df = pd.json_normalize(result['results']['bindings'])
    #print(results_df)

    if not results_df.empty:
        wikiQ = results_df['item.value'][0]
        wikiQLabel = results_df['itemLabel.value'][0]
        try:
            dateBirth = results_df['dateOfBirth.value'][0]
        except:
            dateBirth = '1798-06-08T00:00:00Z'
        try:
            sex = results_df['sex.type'][0]
        except:
            sex = 'uri'


        print(f"{subjectStr} | {subject}: {wikiQ} -> {instance} / {dateBirth} ")
        dfrtemp = pd.DataFrame([[subjectStr, wikiQLabel, wikiQ, dateBirth, sex]], columns=dfr.columns)
        dfr = pd.concat([dfr, dfrtemp])

    else:
        #print(f"{subjectStr} | {subject}: Kein Eintrag in Wikidata gefunden")
        dfrtemp = pd.DataFrame([[subjectStr, subject, np.nan, np.nan, np.nan]], columns=dfr.columns)
        dfr = pd.concat([dfr, dfrtemp])

    #print(x)
    if i > maxresults:
        break

Anton von Blarerweg | Anton von Blarer: http://www.wikidata.org/entity/Q596128 -> Mensch / 1798-06-08T00:00:00Z 
General Guisan-Strasse | General Guisan: http://www.wikidata.org/entity/Q123497 -> Mensch / 1874-10-21T00:00:00Z 
Oskar Biderstrasse | Oskar Bider: http://www.wikidata.org/entity/Q688419 -> Mensch / 1891-07-12T00:00:00Z 
Rue Pierre-Nicolas-Chenaux | Pierre Nicolas Chenaux: http://www.wikidata.org/entity/Q3383407 -> Mensch / 1740-02-26T00:00:00Z 
Rue Antoine-de-Saint-Exupéry | Antoine Exupery: http://www.wikidata.org/entity/Q111354246 -> Mensch / 1951-07-18T00:00:00Z 
Charrière-des-Morts | Charriere Morts: http://www.wikidata.org/entity/Q98768351 -> Mensch / 1795-07-21T00:00:00Z 
Route de Villaz-St-Pierre | Villaz Pierre: http://www.wikidata.org/entity/Q99305387 -> Mensch / 1908-08-11T00:00:00Z 
Rue Robert-Stalder | Robert Stalder: http://www.wikidata.org/entity/Q93253625 -> Mensch / 1940-01-01T00:00:00Z 
Place de l'Hôtel-de-Ville | Hotel Ville: http://www.wikidata.org/entity

Rue Jacques-Gachoud | Jacques Gachoud: http://www.wikidata.org/entity/Q96281495 -> Mensch / 1657-12-31T00:00:00Z 
Sentier Jules-Daler | Jules Daler: http://www.wikidata.org/entity/Q96298242 -> Mensch / 1824-11-04T00:00:00Z 
Impasse du Champ des Pierres | Champ Pierres: http://www.wikidata.org/entity/Q89423874 -> Mensch / 1604-01-01T00:00:00Z 
Route Wilhelm-Kaiser | Wilhelm Kaiser: http://www.wikidata.org/entity/Q99395186 -> Mensch / 1917-01-01T00:00:00Z 
Chemin du Vieux Moulin | Vieux Moulin: http://www.wikidata.org/entity/Q98256153 -> Mensch / 1952-06-12T00:00:00Z 
Rue Pierre-Sciobéret | Pierre Scioberet: http://www.wikidata.org/entity/Q99982623 -> Mensch / 1830-01-13T00:00:00Z 
Vive Fontaine | Vive Fontaine: http://www.wikidata.org/entity/Q33264339 -> Mensch / 1959-08-31T00:00:00Z 
Route de Prin-Né | Prin Ne: http://www.wikidata.org/entity/Q7243864 -> Mensch / 1971-09-04T00:00:00Z 
Avenue Louis-Weck-Reynold | Louis Weck Reynold: http://www.wikidata.org/entity/Q15663400 -> Mensch / 18

AttributeError: 'Response' object has no attribute 'getheader'

In [141]:
dfr.head(10)

Unnamed: 0,Name,wikiQLabel,wikiQ,dateBirth,sex
0,Anton von Blarerweg,Q596128,http://www.wikidata.org/entity/Q596128,1798-06-08T00:00:00Z,uri
0,Ober-Aesch-Bannackerweg,Aesch Bannacker,,,
0,Lärchenweg Häuli,Lärchen Häuli,,,
0,Bois de Pouche,Bois Pouche,,,
0,General Guisan-Strasse,Q123497,http://www.wikidata.org/entity/Q123497,1874-10-21T00:00:00Z,uri
0,Oskar Biderstrasse,Q688419,http://www.wikidata.org/entity/Q688419,1891-07-12T00:00:00Z,uri
0,Im Froloo-Eichli,Froloo Eichli,,,
0,Chlei Bruederhölzli,Chlei Bruederhölzli,,,
0,Chêne des Croix,Chene Croix,,,
0,Gewerbezone Brüggfeld,Gewerbezone Brüggfeld,,,


In [70]:
labelna = streetnames.dropna(subset=['STN_LABEL_FINAL'])
only_more_2_words = labelna.loc[(labelna.STN_LABEL_FINAL.str.contains(" "))]
only_more_2_words


Unnamed: 0.1,Unnamed: 0,STR_ESID,STN_LABEL,COM_FOSNR,COM_NAME,COM_CANTON,STR_OFFICIAL,STR_EASTING,STR_NORTHING,STN_LABEL_NO_BI,STN_LABEL_NO_TERMS,STR_TERMS,STR_PREPS,STN_LABEL_FINAL
20,38,10155431,Anton von Blarerweg,2761,Aesch (BL),BL,True,2612208.0,1257402.0,Anton von Blarerweg,Anton von Blarer,weg,,Anton von Blarer
44,73,10140443,Ober-Aesch-Bannackerweg,2785,Duggingen,BL,True,2613642.0,1256887.0,Ober-Aesch-Bannackerweg,Aesch Bannacker,weg,ober,Aesch Bannacker
54,88,10140888,Lärchenweg Häuli,2829,Liestal,BL,True,2618237.0,1259445.0,Lärchenweg Häuli,Lärchen Häuli,weg,,Lärchen Häuli
71,115,10249007,Bois de Pouche,6771,Alle,JU,True,2578140.0,1253863.0,Bois de Pouche,Bois de Pouche,,,Bois Pouche
87,138,10069811,General Guisan-Strasse,2833,Seltisberg,BL,True,2621236.0,1256549.0,General Guisan-Strasse,General Guisan,strasse,,General Guisan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171786,221154,10129197,Salita Rocchetta,5239,Tresa,TI,True,2710007.0,1091934.0,Salita Rocchetta,Salita Rocchetta,,,Salita Rocchetta
171788,221156,10139453,Cascine di Barico,5239,Tresa,TI,True,2709010.0,1093237.0,Cascine di Barico,Cascine di Barico,,,Cascine Barico
171817,221198,10070574,Altersheim Brünnliacker,4891,Berg (TG),TG,True,2731480.0,1270685.0,Altersheim Brünnliacker,Altersheim Brünnliacker,,,Altersheim Brünnliacker
171825,221210,10090221,Waldhof-Haldenstrasse,4501,Kradolf-Schönenberg,TG,True,2732847.0,1263896.0,Waldhof-Haldenstrasse,Waldhof Halden,strasse,,Waldhof Halden


In [76]:
dfr.to_csv('C:\CAS_Arbeit\cassda-zertifikatsarbeit\Datapreparation\wiki.csv', encoding='UTF-8-SIG', sep=';')