<a href="https://colab.research.google.com/github/InTaVia/backend-presentation-ljubljana-2023/blob/main/intavia_hands_on_9_23.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

We begin with installing some libraries to interact with SPARQl and Rest endpoints

In [None]:
!pip install httpx SPARQLWrapper rdflib matplotlib

Collecting httpx
  Downloading httpx-0.25.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting SPARQLWrapper
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Collecting rdflib
  Downloading rdflib-7.0.0-py3-none-any.whl (531 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m531.9/531.9 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore<0.19.0,>=0.18.0 (from httpx)
  Downloading httpcore-0.18.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting isodate<0.7.0,>=0.6.0 (from rdflib)
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting h11<0.15,>=0.13 (from httpcore<0.19.0,>=0.18.0->httpx)
  Downloading h

Next we define a list of queries we are interested in. In our case we use the person we already saw in the presentation: "Giuseppe Acerbi"

In [None]:
queries = ["Acerbi, Guiseppe"]

Next we define a function that takes a query parameter and runs that query against the InTaVia Rest endpoint.

In [62]:
import httpx
def query_intavia_rest(query_param: str, **kwargs) -> list:
  params = {'q': query_param}
  for key, value in kwargs.items():
    params[key] = value
  print(params)
  res = httpx.get('https://intavia-backend.acdh-dev.oeaw.ac.at/v2/api/entities/search', params=params)
  if res.status_code == 200:
    return res.json()['results']

In [None]:
query_intavia_rest('Acerbi')

[{'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L3Byb3ZpZGVkX3BlcnNvbi82Njg3',
  'label': {'default': 'Acerbi, Enrico'},
  'kind': 'person',
  'linkedIds': [{'label': 'Österreichische Biographische Lexikon, APIS',
    'url': 'https://apis.acdh.oeaw.ac.at/entity/90793'},
   {'label': 'Gemeinsame Normdatei (GND)',
    'url': 'https://d-nb.info/gnd/116241470'}],
  'gender': {'id': 'http://ldf.fi/schema/bioc/Male',
   'label': {'default': 'male'}},
  'occupations': [{'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvb2NjdXBhdGlvbi8xNTQ=',
    'label': {'default': 'Medizin'}},
   {'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvb2NjdXBhdGlvbi8yMDI=',
    'label': {'default': 'Medizin >> Mediziner'}}],
  'alternativeLabels': [{'default': 'Acerbi, Enrico'},
   {'default': 'no label provided'}],
  'biographies': ['aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvdGV4dC85MDc5My9iaW8='],
  'relations': [{'event': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvZGVhdGhldmVudC85MDc5Mw==',
    'role': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2lkbS1yb2xlL2

The return contains a lot of not resolved entities that we need to run against other endpoints

In [57]:
def resolve_events(entity: dict) -> dict:
  event_ids = list(set([rel['event'] for rel in entity['relations']]))
  role_ids = list(set([rel['role'] for rel in entity['relations']]))
  print(list(event_ids))
  res_events = httpx.post('https://intavia-backend.acdh-dev.oeaw.ac.at/v2/api/events/retrieve', json={"id": event_ids})
  res_roles = httpx.post('https://intavia-backend.acdh-dev.oeaw.ac.at/v2/api/vocabularies/roles/retrieve', json={"id": role_ids})
  if res_events.status_code == 200 and res_roles.status_code == 200:
    events_data = res_events.json()['results']
    roles_data = res_roles.json()['results']
    for idx, relation in enumerate(entity['relations']):
      for event in events_data:
        if event['id'] == relation['event']:
          event.pop('relations')
          entity['relations'][idx]['event'] = event
      for role in roles_data:
        if role['id'] == relation['role']:
          entity['relations'][idx]['role'] = role
  return entity



In [None]:
r1 = query_intavia_rest('Acerbi')
r2 = resolve_events(r1[1])

['aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtOQ==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtNQ==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMQ==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtOA==', 'aHR0cHM6Ly93d3cuaW50YXZpYS5ldS9wcm9kdWN0aW9uX2V2ZW50L1E1MTQxNzEwOQ==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMw==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTc=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTU=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvZGVhdGhldmVudC85MDc5Ng==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMg==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTY=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTA=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTE=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtNA==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtMTQ=', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvYmlydGhldmVudC85MDc5Ng==', 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL2V2ZW50LzMwNzgtNw==', 'aHR0cDovL3d3dy

In [None]:
r2

{'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L3Byb3ZpZGVkX3BlcnNvbi85NjQ4',
 'label': {'default': 'Acerbi, Giuseppe'},
 'kind': 'person',
 'linkedIds': [{'label': 'BiographySampo', 'url': 'http://ldf.fi/nbf/p/3078'},
  {'label': 'Wikidata', 'url': 'http://www.wikidata.org/entity/Q55007624'},
  {'label': 'Österreichische Biographische Lexikon, APIS',
   'url': 'https://apis.acdh.oeaw.ac.at/entity/90796'},
  {'label': 'Gemeinsame Normdatei (GND)',
   'url': 'https://d-nb.info/gnd/119372843'}],
 'gender': {'id': 'http://ldf.fi/schema/bioc/Male',
  'label': {'default': 'male'}},
 'occupations': [{'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL29jY3VwYXRpb24vMzY4MzQ=',
   'label': {'default': 'composer'}},
  {'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvb2NjdXBhdGlvbi8xMzU=',
   'label': {'default': 'Naturwissenschaft'}},
  {'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2JzL29jY3VwYXRpb24vMzU3OTAzNQ==',
   'label': {'default': 'travel writer'}},
  {'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvb2NjdXBhdGlvbi8xMjg=',
   '

# Compare data with Wikidata using SPARQL

lets create a SPARQL query to compare the date of birth in wikidata with those from InTaVia

In [None]:
for event in r2['relations']:
  if event['role']['label']['default'] == 'Born Person':
    print(event['event']['startDate'])

1773-01-01
1773-05-03


In [None]:
sparql = """
select * where {
BIND(<http://www.wikidata.org/entity/Q55007624> AS ?acerbi)
?acerbi wdt:P31 wd:Q5 ;
        wdt:P569 ?date_of_birth
}
"""

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
sparql_wikidata = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql_wikidata.setQuery(sparql)
sparql_wikidata.setReturnFormat(JSON)
results = sparql_wikidata.query().convert()

In [None]:
results

{'head': {'vars': ['acerbi', 'date_of_birth']},
 'results': {'bindings': [{'acerbi': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q55007624'},
    'date_of_birth': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime',
     'type': 'literal',
     'value': '1773-05-03T00:00:00Z'}}]}}

# Create a simple visualization in Python using the API

In [64]:
institutions = query_intavia_rest('Künstlerhaus', kind=['group'])

{'q': 'Künstlerhaus', 'kind': ['group']}


In [78]:
kuenstlerhaus = institutions[9]

In [79]:
kuenstlerhaus

{'id': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L3Byb3ZpZGVkX2dyb3VwLzI0NDc=',
 'label': {'default': 'Genossenschaft der Bildenden Künstler Wiens (Künstlerhaus)'},
 'kind': 'group',
 'linkedIds': [{'label': 'Österreichische Biographische Lexikon, APIS',
   'url': 'https://apis.acdh.oeaw.ac.at/entity/98141'},
  {'label': 'Gemeinsame Normdatei (GND)',
   'url': 'https://d-nb.info/gnd/3009578-5'}],
 'alternativeLabels': [{'default': 'Genossenschaft der Bildenden Künstler Wiens (Künstlerhaus)'}],
 'relations': [{'event': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvY2FyZWVyLzExODgyNA==',
   'role': 'aHR0cDovL2xkZi5maS9zY2hlbWEvYmlvYy9Hcm91cF9SZWxhdGlvbnNoaXBfUm9sZQ=='},
  {'event': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvY2FyZWVyLzEwOTY4MA==',
   'role': 'aHR0cDovL2xkZi5maS9zY2hlbWEvYmlvYy9Hcm91cF9SZWxhdGlvbnNoaXBfUm9sZQ=='},
  {'event': 'aHR0cDovL3d3dy5pbnRhdmlhLmV1L2FwaXMvY2FyZWVyLzExNjI4MQ==',
   'role': 'aHR0cDovL2xkZi5maS9zY2hlbWEvYmlvYy9Hcm91cF9SZWxhdGlvbnNoaXBfUm9sZQ=='},
  {'event': 'aHR0cDovL3d3dy5pbnRh