In [11]:
import json
import requests
import pandas as pd
import rdflib

In [12]:
# create url with person-variable
person = 'Julius II.'
url = f"https://lobid.org/gnd/search?q={person}&format=json"
print(url)


https://lobid.org/gnd/search?q=Julius II.&format=json


In [13]:
# fetch url
# returns first 10 search results from url
response = requests.get(url)

# if fetching is successful (status_code 200)
if response.status_code == 200:
    #convert to json
    data = response.json()

# "pretty" print data with intent
print(json.dumps(data, indent=2))

{
  "@context": "https://lobid.org/gnd/context.jsonld",
  "id": "https://lobid.org/gnd/search?q=Julius%20II.&format=json",
  "totalItems": 213,
  "member": [
    {
      "professionOrOccupation": [
        {
          "id": "https://d-nb.info/gnd/4044561-6",
          "label": "Papst"
        }
      ],
      "placeOfBirth": [
        {
          "id": "https://d-nb.info/gnd/4428342-8",
          "label": "Albisola Superior"
        }
      ],
      "gender": [
        {
          "id": "https://d-nb.info/standards/vocab/gnd/gender#male",
          "label": "M\u00e4nnlich"
        }
      ],
      "dateOfDeath": [
        "1513"
      ],
      "dateOfBirth": [
        "1443"
      ],
      "placeOfDeath": [
        {
          "id": "https://d-nb.info/gnd/4050471-2",
          "label": "Rom"
        }
      ],
      "variantNameEntityForThePerson": [
        {
          "counting": [
            "II"
          ],
          "nameAddition": [
            "Pape"
          ],
          "pe

In [14]:
# get gnd-number from first member in results
# acces key with get() --> safer, does not return KeyError if empty instead of data['member']
members = data.get('member', [])

In [25]:
# Fetch person by name,
# filter by type:Person and professionOrOccupation

#person = 'Julius II.'
#profession = 'Papst'
person = 'Marquard Bermann'
profession = 'Bischof'

url = f"https://lobid.org/gnd/search?q=preferredName:{person}" \
      f"&filter=professionOrOccupation.label:{profession} " \
      f"&filter=type:Person" \
      f"&format=json"

#url = f"https://lobid.org/gnd/search?q={person}&format=json"

print(url)

https://lobid.org/gnd/search?q=preferredName:Marquard Bermann&filter=professionOrOccupation.label:Bischof OR professionOrOccupationAsLiteral:Bischof&filter=type:Person&format=json


In [16]:
member = members[0]
member_gnd = member.get('gndIdentifier', [])
print(member_gnd)

118714090


In [17]:
# get label of gender
gender_labels = [g.get('label', "") for g in member.get('gender', [])]
gender_labels = ",".join(gender_labels)
print(gender_labels)

Männlich


In [18]:
# Create a list of person names
# Bonifatius ist ein gutes Beispiel für: Einschränken nach Person
# Julius II mehrere Ergebnisse
names = ['Marquard Bermann', 'Bonifatius IX.', 'Julius II.']
professions = ['Bischof', 'Papst', 'Papst']

# Combine into a dictionary
persons = dict(zip(names, professions))

In [26]:
# initialize empty list
persons_data = []

# loop over persons and fetch data
for person, profession in persons.items():

    print(f"fetch data for person: {person}, profession: {profession}")

    # build url
    url = f"https://lobid.org/gnd/search?q=preferredName:{person}" \
          f"&filter=professionOrOccupation.label:{profession} " \
          f"OR professionOrOccupationAsLiteral:{profession}" \
          f"&filter=type:Person" \
          f"&format=json"

    # fetch url
    response = requests.get(url)

    # if response is available: extract data
    if response.status_code == 200:

        # convert to json
        data = response.json()

        # process results for personname,
        # note: one person can have multiple results (member-entries)
        members = data.get('member', [])

        if len(members) > 0:
            # process only first 5 results (by default, 10 are returned)
            for member in members:

                # append fetched data
                persons_data.append({
                    'input_name' : person,
                    'person_name':member.get('preferredName', []),
                    'gnd': member.get('gndIdentifier', []),
                    'member_gnd_link' : member.get('id', []),
                    'gender' : ",".join([i.get('label', "") for i in member.get('gender', [])]),
                    'date_death' : ",".join(member.get('dateOfDeath', [])),
                    'date_birth' : ",".join(member.get('dateOfBirth', [])),
                    'profession': ",".join(member.get('professionOrOccupationAsLiteral', []))
                }
                )

        # if no members available: add person name to list
        else:
            # add only person name
            persons_data.append({
                'input_name': person,
            })

# convert to pandas dataframe
persons_data = pd.DataFrame(persons_data)

persons_data

fetch data for person: Marquard Bermann, profession: Bischof
fetch data for person: Bonifatius IX., profession: Papst
fetch data for person: Julius II., profession: Papst


Unnamed: 0,input_name,person_name,gnd,member_gnd_link,gender,date_death,date_birth,profession
0,Marquard Bermann,"Bermann, Marquard",1150194170,https://d-nb.info/gnd/1150194170,Männlich,1378-09-17,,"Domherr,Bischof,Propst"
1,Bonifatius IX.,"Bonifatius IX., Papst",118661469,https://d-nb.info/gnd/118661469,Männlich,1404,1350.0,
2,Julius II.,"Julius II., Papst",118714090,https://d-nb.info/gnd/118714090,Männlich,1513,1443.0,
3,Julius II.,"Julius III., Papst",118955314,https://d-nb.info/gnd/118955314,Männlich,1555,1487.0,


In [None]:
# select top entry for persons
persons_data_top = persons_data.groupby('input_name').first().reset_index()

In [74]:
# Parse json-ld as graph
graph = rdflib.Graph()
graph.parse(data=data, format="json-ld")

# Print extracted data
print(graph)

# context files?
#context_url = "https://lobid.org/gnd/context.jsonld"
#context_response = requests.get(context_url)
#context = context_response.json()


[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']].


In [111]:
# SPARQL query to get all gndIdentifiers
query = """
    SELECT ?gndIdentifier WHERE {
        ?subject <https://d-nb.info/standards/elementset/gnd#gndIdentifier> ?gndIdentifier .
    }
"""

# Execute the query on the graph
results = graph.query(query)

# Iterate over the results and print gndIdentifiers
for row in results:
    print(f"gndIdentifier: {row[0]}")

gndIdentifier: 118714090
gndIdentifier: 1122839928
gndIdentifier: 1122850379
gndIdentifier: 116940069
gndIdentifier: 1185522085
gndIdentifier: 1122854420
gndIdentifier: 1122854447
gndIdentifier: 1073364208
gndIdentifier: 1157906265
gndIdentifier: 1251374581


In [108]:

# SPAQRL-Query
query = """
    SELECT ?personName ?gndIdentifier ?gender ?dateOfDeath ?dateOfBirth ?profession WHERE {
        ?subject a <https://d-nb.info/standards/elementset/gnd#Person> .

        # Get preferredName
        ?subject <https://d-nb.info/standards/elementset/gnd#preferredName> ?personName .

        # Get gndIdentifier
        ?subject <https://d-nb.info/standards/elementset/gnd#gndIdentifier> ?gndIdentifier .

        # Get gender (assuming it's under gender or similar property)
        ?subject <https://d-nb.info/standards/elementset/gnd#gender> ?gender .

        # Get dateOfDeath
        ?subject <https://d-nb.info/standards/elementset/gnd#dateOfDeath> ?dateOfDeath .

        # Get dateOfBirth
        ?subject <https://d-nb.info/standards/elementset/gnd#dateOfBirth> ?dateOfBirth .

        # Get profession
        ?subject <https://d-nb.info/standards/elementset/gnd#professionOrOccupation> ?profession .

    }
"""

# Execute the query on the graph
results = graph.query(query)

# Iterate over the results and collect the data
members = []
for row in results:
    member = {
        'person_name_result': row['personName'].value if row['personName'] else "",
        'gnd': row['gndIdentifier'],
        'gender': row['gender'] if row['gender'] else "",
        'date_death': row['dateOfDeath'].value if row['dateOfDeath'] else "",
        'date_birth': row['dateOfBirth'].value if row['dateOfBirth'] else "",
        'profession': row['profession'] if row['profession'] else "",
    }
    members.append(member)

# Print the extracted members' data
for member in members:
    print(member)

{'person_name_result': 'Julius II., Papst', 'gnd': rdflib.term.Literal('118714090'), 'gender': rdflib.term.URIRef('https://d-nb.info/standards/vocab/gnd/gender#male'), 'date_death': '1513', 'date_birth': '1443', 'profession': rdflib.term.URIRef('https://d-nb.info/gnd/4044561-6')}
{'person_name_result': 'Julius III., Papst', 'gnd': rdflib.term.Literal('118955314'), 'gender': rdflib.term.URIRef('https://d-nb.info/standards/vocab/gnd/gender#male'), 'date_death': '1555', 'date_birth': '1487', 'profession': rdflib.term.URIRef('https://d-nb.info/gnd/4044561-6')}
