## Querying for a person object
Exemplary query structure for a person with three name elements, dOB and dOD

### urllib version (Python 3)

In [3]:
import urllib.parse
import urllib.request
import time

url = 'https://lobid.org/gnd/reconcile/'

data = {'queries': '{"q0":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson", "limit":100}, \
        "q1":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q2":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q3":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q4":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q5":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q6":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q7":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100},\
        "q8":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q9":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q10":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q11":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q12":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q13":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q14":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q15":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}}'}

encoded_data = urllib.parse.urlencode(data).encode('utf-8')

start_time = time.time()
request = urllib.request.Request(url, encoded_data)
response = urllib.request.urlopen(request)
end_time = time.time()

response_time = (end_time - start_time)
print(f"Response time: {response_time} seconds")
print(response.status)
#print(response.read())

Response time: 1.7906429767608643 seconds
200


### requests version

In [4]:
import requests
import time

url = 'https://lobid.org/gnd/reconcile/'

# other reconciliation services do not work without tweaking the queries
#url = ' https://wikidata.reconci.link/en/api'

# Philipp Heinrich Hopf (1747–1804)
# query is all combinations of at least two name elements with queries for 1. name only, 2. name + dOB, 3. name + dOD, 4. name + dOB + dOD

data = {'queries': '{"q0":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson", "limit":100}, \
        "q1":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q2":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q3":{"query":"Philipp~ Heinrich~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q4":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q5":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q6":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q7":{"query":"Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100},\
        "q8":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q9":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q10":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q11":{"query":"Philipp~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q12":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson", "limit":100}, \
        "q13":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}], "limit":100}, \
        "q14":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfDeath","v":"1804"}], "limit":100}, \
        "q15":{"query":"Philipp~ Heinrich~ Hopf~","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}}'}

# it might be ok to query for all additional values in one q using OR as it gives the same result - but in case the limit is hit some results might be outside 
#data = {'queries': '{"q0":{"query":"Philipp Heinrich","type":"DifferentiatedPerson", "limit":100}, "q3":{"query":"Philipp Heinrich","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, "q4":{"query":"Heinrich Hopf","type":"DifferentiatedPerson", "limit":100}, "q7":{"query":"Heinrich Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100},"q8":{"query":"Philipp Hopf","type":"DifferentiatedPerson", "limit":100}, "q11":{"query":"Philipp Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}, "q12":{"query":"Philipp Heinrich Hopf","type":"DifferentiatedPerson", "limit":100}, "q15":{"query":"Philipp Heinrich Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":100}}'}
#data = {'queries': '{"q3":{"query":"Philipp Heinrich","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":40}, "q7":{"query":"Heinrich Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":40}, "q11":{"query":"Philipp Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":40}, "q15":{"query":"Philipp Heinrich Hopf","type":"DifferentiatedPerson","properties":[{"pid":"dateOfBirth","v":"1747"}, {"pid":"dateOfDeath","v":"1804"}], "limit":40}}'}

# test for damerau-levenshtein
#data = {'queries': '{"q0":{"query":"Johann~ Adolp~ Sliovogt~", "limit":100}}'}

start_time = time.time()
response = requests.post(url, data=data)
end_time = time.time()

response_time = (end_time - start_time)
print(f"Response time: {response_time} seconds")
print(response.status_code)
#print(response.text)

Response time: 1.7163090705871582 seconds
200


## Writing all GND-IDs to a list

In [183]:
import json

id_list = []
clean_list = []

data = json.loads(response.text)

for query in data:
    result_list = data[query]['result']
    for result in result_list:
        id_value = result['id']
        #print(f'["{query}"].result["{result_list.index(result)}"].id = {id_value}')
        id_list.append(id_value)
        
clean_list = list(set(id_list))
        
print("The responses contain " + str(len(clean_list)) + " (" + str(len(id_list)) + " if including duplicates)" + " elements in total.")
print(clean_list)

All responses contain 182(740 if counting duplicates) elements in total.
['122823400', '106907523X', '1067644539', '124438741', '136093272', '116989262', '1034903500', '123757304', '1083041762', '128361204', '128473363', '116082151', '120057476', '115387056', '102542236', '1113884657', '1102958778', '1052804624', '136740391', '119787121', '1038460417', '100358241', '1166822990', '1058351974', '1034669540', '13352325X', '124648010', '1106580141', '118705814', '136062660', '132747979', '1052431518', '1052462464', '1025280725', '12344117X', '108495175', '1035535882', '1016945108', '118565265', '116935413', '122445465', '118084658', '1050113985', '1161169342', '1053422377', '124191479', '1052725783', '132196867', '120588994', '1052714897', '1037516184', '106136190X', '131916866', '104304413', '134013492', '131723618', '122979540', '120702819', '1131285751', '1244416983', '1021396974', '101441928X', '121033104', '124648088', '1024826449', '142933732', '1220647233', '136642667', '1016608357'

## Generating datasets via reconciliation_extend
for all available properties see https://lobid.org/gnd/reconcile/properties?type=Person

In [184]:
import requests
import time

clean_list_string = ', '.join(['"{0}"'.format(x) for x in clean_list])

url = 'https://lobid.org/gnd/reconcile/'
data = {'extend':'{"ids":[' + clean_list_string + '],"properties":[\
{"id":"preferredName"},\
{"id":"variantName"},\
{"id":"dateOfBirth"},\
{"id":"placeOfBirth"},\
{"id":"dateOfDeath"},\
{"id":"placeOfDeath"},\
{"id":"professionOrOccupation"}]}'}

start_time = time.time()
response = requests.post(url, data=data)
end_time = time.time()

response_time = (end_time - start_time)
print(f"Response time: {response_time} seconds")
#print(response.status_code)
results = json.dumps(response.json(), indent=2) 
results = results.encode('utf-8').decode('unicode-escape')
print(results)

Response time: 2.168267011642456 seconds
200
{
  "meta": [
    {
      "id": "preferredName",
      "name": "Bevorzugter Name"
    },
    {
      "id": "variantName",
      "name": "Varianter Name"
    },
    {
      "id": "dateOfBirth",
      "name": "Geburtsdatum"
    },
    {
      "id": "placeOfBirth",
      "name": "Geburtsort",
      "type": {
        "id": "PlaceOrGeographicName",
        "name": "Geografikum"
      }
    },
    {
      "id": "dateOfDeath",
      "name": "Sterbedatum"
    },
    {
      "id": "placeOfDeath",
      "name": "Sterbeort",
      "type": {
        "id": "PlaceOrGeographicName",
        "name": "Geografikum"
      }
    },
    {
      "id": "professionOrOccupation",
      "name": "Beruf oder Beschäftigung",
      "type": {
        "id": "SubjectHeading",
        "name": "Schlagwort"
      }
    }
  ],
  "rows": {
    "122823400": {
      "preferredName": [
        {
          "str": "Eckestein, Philipp Heinrich"
        }
      ],
      "variantName": 

## Alternative: Generating datasets from regular lobidAPI - needs one request per GND-ID

In [161]:
results_dict = {"member": []}
for gnd in clean_list:
    url = 'http://lobid.org/gnd/' + gnd + '.json'
    response = requests.get(url).json()
    results_dict["member"].append(response)

results_dict = json.dumps(results_dict, indent=2) 
results_dict = results_dict.encode('utf-8').decode('unicode-escape')
#print(results_dict)

{
  "member": [
    {
      "professionOrOccupation": [
        {
          "id": "https://d-nb.info/gnd/4029050-5",
          "label": "Jurist"
        }
      ],
      "placeOfBirth": [
        {
          "id": "https://d-nb.info/gnd/4024025-3",
          "label": "Heilbronn"
        }
      ],
      "gender": [
        {
          "id": "https://d-nb.info/standards/vocab/gnd/gender#male",
          "label": "Männlich"
        }
      ],
      "dateOfDeath": [
        "1682-04-06"
      ],
      "dateOfBirth": [
        "1653-08-03"
      ],
      "placeOfDeath": [
        {
          "id": "https://d-nb.info/gnd/4024025-3",
          "label": "Heilbronn"
        }
      ],
      "variantNameEntityForThePerson": [
        {
          "forename": [
            "Philippus Henricus"
          ],
          "surname": [
            "Eckestein"
          ]
        },
        {
          "forename": [
            "Philipp Heinrich"
          ],
          "surname": [
            "Eckstein"