# [BiographySampo](http://biografiasampo.fi/haku/ruudukko) data to InTaVia format

Install the dependencies:

In [27]:
!pip install SPARQLWrapper numpy scipy matplotlib pandas networkx
%matplotlib inline

import codecs
import datetime
from dateutil.relativedelta import relativedelta

import matplotlib.pyplot as plt
import numpy    as np
import networkx as nx
import pandas   as pd
import re
import seaborn as sns
import sys

from collections import Counter, OrderedDict, defaultdict
from itertools import product
from rdflib.namespace import XSD, Namespace
from rdflib import URIRef
from statistics import mean
from scipy.stats import binned_statistic
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
from scipy.signal import convolve2d

from SPARQLWrapper import SPARQLWrapper, JSON, POST

import logging
LOGGER = logging.getLogger(sys.argv[0])


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Define some useful functions for data convertion

In [28]:
DATATYPECONVERTERS = {
      str(XSD.integer):  int,
      # str(XSD.date):     lambda v: datetime.datetime.strptime(v, '%Y-%m-%d').date()
      str(XSD.decimal):  float,
  }

def convertDatatype(obj):
  return DATATYPECONVERTERS.get(obj.get('datatype'), str)(obj.get('value')) 

def convertDatatypes(results):
    res = results["results"]["bindings"]
    return [dict([(k, convertDatatype(v)) for k,v in r.items()]) for r in res]

def JSON2Pandas(results):
    return pd.DataFrame(convertDatatypes(results))

from rdflib import Namespace, URIRef, Graph, Literal, XSD
from rdflib.namespace import RDF, SKOS, OWL, FOAF, RDFS, XSD, DCTERMS
FOAF = Namespace(FOAF)

BIOC = Namespace("http://ldf.fi/schema/bioc/")
CRM = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
GVP = Namespace("http://vocab.getty.edu/ontology#")
# GEO = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
GEO = Namespace("http://www.opengis.net/ont/geosparql#")
LABEL = Namespace("http://ldf.fi/nbf/label/")
SCHEMA = Namespace("http://schema.org/")
SKOSXL = Namespace("http://www.w3.org/2008/05/skos-xl#")

NBF = Namespace("http://ldf.fi/nbf/")
IDM = Namespace("http://www.intavia.eu/")
IDMCORE = Namespace("http://www.intavia.eu/idm-core/")

IDM_PERSON = Namespace('http://www.intavia.eu/bs/personproxy/')
IDM_PROVIDED_PERSON = Namespace('http://www.intavia.eu/bs/provided_person/')
IDM_PLACE = Namespace('http://www.intavia.eu/bs/place/')
IDM_PLACEAPPELLATION = Namespace('http://www.intavia.eu/bs/placeappellation/')
IDM_SPACEPRIMITIVE = Namespace('http://www.intavia.eu/bs/spaceprimitive/')
IDM_ROLE = Namespace('http://www.intavia.eu/idm-role/')

TIMES = Namespace("http://ldf.fi/nbf/times/")
TITLES = Namespace("http://ldf.fi/nbf/titles/")
PLACES = Namespace("http://ldf.fi/nbf/places/")
SOURCES = Namespace("http://ldf.fi/nbf/sources/")

WD = Namespace("http://www.wikidata.org/entity/")

PREFIXES = """ PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX bioc: <http://ldf.fi/schema/bioc/> 
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/> 
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX nbf: <http://ldf.fi/nbf/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> 
PREFIX skosxl: <http://www.w3.org/2008/05/skos-xl#>
PREFIX sources: <http://ldf.fi/nbf/sources/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX idm-core: <http://www.intavia.eu/idm-core/>
"""

def initGraph(g = None):
    
    if not g:
        g=Graph()
    
    # bind prefixes
    g.bind("dct", DCTERMS)
    g.bind("gvp", GVP)
    g.bind("owl", OWL)
    g.bind("foaf", FOAF)
    g.bind("idm", IDM)
    g.bind("idmcore", IDMCORE)
    g.bind("idmrole", IDM_ROLE)
    g.bind("rdf", RDF)
    g.bind("rdfs", RDFS)
    g.bind("schema", SCHEMA)
    g.bind("sources", SOURCES)
    g.bind("skos",SKOS)
    g.bind("skosxl", SKOSXL)
    g.bind("xsd", XSD)
    g.bind("geo", GEO)
    g.bind("crm", CRM)
    g.bind("idnametype", Namespace('http://www.intavia.eu/nametype/'))
    
    g.bind("bioc", BIOC)
    g.bind("label", LABEL)
    g.bind("nbf", NBF)
    g.bind("times", TIMES)
    g.bind("wd", WD)
    
    return g

def saveGraphs(tuples, silent=False):
    
    for g, outfile in tuples:
        
        if len(g):
            
            args = sys.argv
            
            outf = codecs.open(outfile, encoding='utf-8', mode='w')
            
            outf.write("# Created with script {}\n".format(args[0]))
            outf.write("# in colab https://colab.research.google.com/drive/1c6hwZs1ooi1G1kfHeB-JYeywemyY-mU-?usp=sharing")
            outf.write("# python3 {}\n".format(' '.join(args)))
            
            now = datetime.datetime.now()
            outf.write("# {}\n\n".format(now.strftime("%Y-%m-%d %H:%M")))
            
            # g.serialize(destination=outfile, format='turtle')
            #  outf.write( g.serialize(format='turtle').decode("utf-8") )
            outf.write( g.serialize(format='turtle') )
            
            outf.close()
            
            if not silent:
                LOGGER.info("{} triples saved to {}".format(len(g),outfile))
            
        elif not silent:
            LOGGER.info("No output to {}".format(outfile))

# Quering NBF people data
[yasgui.com](https://api.triplydb.com/s/l_IDH5IdF)

In [29]:
sparql = SPARQLWrapper("https://ldf.fi/nbf/sparql")
sparql.setQuery(PREFIXES +
""" SELECT DISTINCT *
WHERE {
  VALUES ?source { sources:source1 }
  ?id a nbf:PersonConcept ;
   foaf:focus ?prs ;
   dct:source ?source ;
   skosxl:prefLabel/skos:prefLabel ?label .
  
  FILTER EXISTS { ?prs ^crm:P100_was_death_of [] }

  ?prs schema:gender ?gender .
  OPTIONAL { ?id nbf:wikidata ?wiki }
  OPTIONAL { ?prs nbf:image ?image . ?image dct:source sources:source9 . FILTER(ISURI(?image)) }
} """)

sparql.setReturnFormat(JSON)
sparql.setMethod('POST')
# sparql.addCustomHttpHeader()
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results")

print(f"Datafields {results['head']['vars']}")
res = convertDatatypes(results)

for ob in res[:10]: # 5967 results, 5855 after removing double images(?)
  print(ob)

5855 results
Datafields ['source', 'id', 'prs', 'label', 'gender', 'wiki', 'image']
{'source': 'http://ldf.fi/nbf/sources/source1', 'id': 'http://ldf.fi/nbf/p1', 'prs': 'http://ldf.fi/nbf/p1-actor', 'label': 'Konttinen, Helena', 'gender': 'http://schema.org/Female', 'wiki': 'http://www.wikidata.org/entity/Q5703623', 'image': 'http://commons.wikimedia.org/wiki/Special:FilePath/HelenaKonttinen.png'}
{'source': 'http://ldf.fi/nbf/sources/source1', 'id': 'http://ldf.fi/nbf/p10', 'prs': 'http://ldf.fi/nbf/p10-actor', 'label': 'Jussoila, Johannes', 'gender': 'http://schema.org/Male', 'wiki': 'http://www.wikidata.org/entity/Q17381252'}
{'source': 'http://ldf.fi/nbf/sources/source1', 'id': 'http://ldf.fi/nbf/p100', 'prs': 'http://ldf.fi/nbf/p100-actor', 'label': 'Banér, Nils Eskilinpoika', 'gender': 'http://schema.org/Male', 'wiki': 'http://www.wikidata.org/entity/Q6156791'}
{'source': 'http://ldf.fi/nbf/sources/source1', 'id': 'http://ldf.fi/nbf/p1000', 'prs': 'http://ldf.fi/nbf/p1000-actor',

In [30]:
# temporal, construct a query block with values (bs_id wiki_id)
#
# arr = ["(<{}> <{}>)".format(ob.get('id'), ob.get('wiki')) for ob in res if ob.get('wiki')]
# VALUES = " ".join(arr)
# VALUES

## output person ttl

In [31]:
""" 
change url 
 http://ldf.fi/nbf/p9024 -> http://www.intavia.eu/personproxy/bs/9024

and Provided_Person in the format
 http://www.intavia.eu/personproxy/bs/10 idm-core:person_proxy_for http://www.intavia.eu/provided_person/bs/10 .
 http://www.intavia.eu/provided_person/bs/10 a idm-core:Provided_Person 
"""

def formatLabel(st):
  return re.sub(r'(\d+)\s*[-]\s*(\d+)', r'\1-\2', st)

def stripNBFUrl(v):
  return v.replace(NBF.p, '')

def nbfPerson2Intavia(v):
  return URIRef(v.replace(NBF.p, IDM_PERSON))

KEY_LOOKUP = dict(((ob.get('id'), stripNBFUrl(ob.get('id'))) for ob in res))
URL_LOOKUP = dict(((ob.get('id'), nbfPerson2Intavia(ob.get('id')) ) for ob in res))
g = initGraph()

for ob in res:
  prs = URL_LOOKUP.get(ob.get('id'))
  g.add((prs, RDF.type, CRM.E21_Person))
  g.add((prs, RDF.type, IDMCORE.Person_Proxy))

  provided = URIRef(str(prs).replace(IDM_PERSON, IDM_PROVIDED_PERSON))
  g.add((provided, RDF.type, IDMCORE.Provided_Person))
  g.add((prs, IDMCORE.person_proxy_for, provided))

  '''
  http://www.intavia.eu/personproxy/bs/10 idm-core:person_proxy_for http://www.intavia.eu/provided_person/bs/10 .
  http://www.intavia.eu/provided_person/bs/10 a idm-core:Provided_Person 
  '''

  # link to BS data
  g.add((prs, OWL.sameAs, URIRef(ob.get('id'))))

  v = ob.get('wiki')
  if v:
    g.add((prs, OWL.sameAs, URIRef(v)))
  
  v = ob.get('gender')
  if v:
    v = URIRef(v.replace(SCHEMA, BIOC))
    g.add((prs, BIOC.has_gender, v))

  v = ob.get('image')
  if v:
    img = URIRef(v)
    g.add((img, RDF.type, CRM.E36_Visual_Item))
    g.add((img, CRM.P138_represents, prs))
  
  st = Literal(formatLabel(ob.get('label')))
  g.add((prs, RDFS.label, st))
  g.add((provided, RDFS.label, st))

# KEY_LOOKUP, URL_LOOKUP...
print(g.serialize(format='turtle')[:2000], '...\n...\n', g.serialize(format='turtle')[-2000:])

@prefix bioc: <http://ldf.fi/schema/bioc/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix idmcore: <http://www.intavia.eu/idm-core/> .
@prefix nbf: <http://ldf.fi/nbf/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix wd: <http://www.wikidata.org/entity/> .

<http://commons.wikimedia.org/wiki/Special:FilePath/%D0%92%D0%B8%D1%80%D0%B5%D0%BD%D0%B8%D1%83%D1%81%20%D0%90%D0%BD%D0%B4%D1%80%D0%B5%D0%B9%20%D0%90%D0%BD%D0%B4%D1%80%D0%B5%D0%B5%D0%B2%D0%B8%D1%87.jpg> a crm:E36_Visual_Item ;
    crm:P138_represents <http://www.intavia.eu/bs/personproxy/1179> .

<http://commons.wikimedia.org/wiki/Special:FilePath/%D0%9D%D0%B8%D0%BA%D0%B8%D1%82%D1%81%D0%BA%D0%B8%D0%B9%20%D0%B1%D0%BE%D1%82%D0%B0%D0%BD%D0%B8%D1%87%D0%B5%D1%81%D0%BA%D0%B8%D0%B9%20%D1%81%D0%B0%D0%B4.%20%D0%9F%D0%B0%D0%BC%D1%8F%D1%82%D0%BD%D0%B8%D0%BA%20%D0%A5.%20%D0%A1%D1%82%D0%B8%D0%B2%D0%B5%D0%BD%D1%83%20-%20panoramio.jpg> a crm:E36_Visual_Item ;
    crm:

##Query labels

In [32]:
IDS = ' '.join(("<{}>".format(k) for k in URL_LOOKUP.keys()))
IDS

'<http://ldf.fi/nbf/p1> <http://ldf.fi/nbf/p10> <http://ldf.fi/nbf/p100> <http://ldf.fi/nbf/p1000> <http://ldf.fi/nbf/p1001> <http://ldf.fi/nbf/p1002> <http://ldf.fi/nbf/p1003> <http://ldf.fi/nbf/p1004> <http://ldf.fi/nbf/p1005> <http://ldf.fi/nbf/p1006> <http://ldf.fi/nbf/p1007> <http://ldf.fi/nbf/p1008> <http://ldf.fi/nbf/p1009> <http://ldf.fi/nbf/p101> <http://ldf.fi/nbf/p1010> <http://ldf.fi/nbf/p1011> <http://ldf.fi/nbf/p1012> <http://ldf.fi/nbf/p1013> <http://ldf.fi/nbf/p1014> <http://ldf.fi/nbf/p1015> <http://ldf.fi/nbf/p1016> <http://ldf.fi/nbf/p1017> <http://ldf.fi/nbf/p1018> <http://ldf.fi/nbf/p1019> <http://ldf.fi/nbf/p102> <http://ldf.fi/nbf/p1020> <http://ldf.fi/nbf/p1021> <http://ldf.fi/nbf/p1022> <http://ldf.fi/nbf/p1023> <http://ldf.fi/nbf/p1024> <http://ldf.fi/nbf/p1025> <http://ldf.fi/nbf/p1026> <http://ldf.fi/nbf/p1027> <http://ldf.fi/nbf/p1028> <http://ldf.fi/nbf/p1029> <http://ldf.fi/nbf/p103> <http://ldf.fi/nbf/p1030> <http://ldf.fi/nbf/p1031> <http://ldf.fi/nbf/p

In [33]:
q = PREFIXES + """ 
SELECT DISTINCT *
WHERE {
  VALUES ?id { """ +IDS+ """ }
  VALUES ?prop { skosxl:prefLabel skosxl:altLabel }
  ?id ?prop ?label_id .
  OPTIONAL { ?label_id schema:givenName ?forename }
  OPTIONAL { ?label_id schema:familyName ?surname }
  OPTIONAL { ?label_id skos:prefLabel ?label } 
}"""
print(q)
sparql.setQuery(q)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results")

print(f"Datafields {results['head']['vars']}")
res = convertDatatypes(results)

for ob in res[:5]:
  print(ob)

 PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX bioc: <http://ldf.fi/schema/bioc/> 
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/> 
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX nbf: <http://ldf.fi/nbf/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX schema: <http://schema.org/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#> 
PREFIX skosxl: <http://www.w3.org/2008/05/skos-xl#>
PREFIX sources: <http://ldf.fi/nbf/sources/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
PREFIX idm-core: <http://www.intavia.eu/idm-core/>
 
SELECT DISTINCT *
WHERE {
  VALUES ?id { <http://ldf.fi/nbf/p1> <http://ldf.fi/nbf/p10> <http://ldf.fi/nbf/p100> <http://ldf.fi/nbf/p1000> <http://ldf.fi/nbf/p1001> <http://ldf.fi/nbf/p1002> <http://ldf.fi/nbf/p1003> <http://ldf.fi/nbf/p1004> <http://ldf.fi/nbf/p1005> <http://ldf.fi/nbf/p1006> <http://ldf.fi/nbf/p1007> <http://ldf.fi/nbf/p1008> <http://ldf.


```
<http://www.intavia.eu/personproxy/bs/9024> crm:P1_is_identified_by  <http://www.intavia.eu/name/1/bs_9024> .

<http://www.intavia.eu/name/1/10055_t12347> a crm:E33_E41_Linguistic_Appellation ;
   crm:P148_has_component <http://www.intavia.eu/name/2/10055_t12347>,
       <http://www.intavia.eu/name/3/10055_t12347> .
 
<http://www.intavia.eu/name/2/10055_t12347> a crm:E33_E41_Linguistic_Appellation ;
   rdf:type <http://www.intavia.eu/nametype/surname> ;
   rdfs:value "Blau_t" .
 
<http://www.intavia.eu/name/3/10055_t12347> a crm:E33_E41_Linguistic_Appellation ;
   rdf:type <http://www.intavia.eu/nametype/forename> ;
   rdfs:value "Tina_t" .
```


In [34]:
def getLabelUrl(idx, prs_id):
  return URIRef('http://www.intavia.eu/name/bs/{}/{}'.format(cn[prs_id], KEY_LOOKUP.get(prs_id)))

def addLabel(g, cn, ob, cls = CRM.E33_E41_Linguistic_Appellation):
  
  if ob.get('forename','')+ob.get('surname','')=='':
    return

  prs_id = ob.get('id')
  
  cn[prs_id] += 1
  label_url = getLabelUrl(cn[prs_id], prs_id)
  g.add((URL_LOOKUP.get(prs_id), CRM.P1_is_identified_by, label_url))
  g.add((label_url, RDF.type, cls))
  st = ob.get('label')
  if st:
    g.add((label_url, RDFS.label, Literal(st)))

  
  for f in ['forename', 'surname']:
    name = ob.get(f)
    if name:

      if f=='forename':
        # split given names
        arr = name.split(' ')
      else:
        arr = [name]

      for st in arr:
        cn[prs_id] += 1
        label_url2 = getLabelUrl(cn[prs_id], prs_id)
        g.add((label_url, CRM.P148_has_component, label_url2))
        g.add((label_url2, RDF.type, cls))
        
        g.add((label_url2, RDF.type, URIRef('http://www.intavia.eu/nametype/'+f)))
        g.add((label_url2, RDFS.label, Literal(st.strip())))

cn = Counter()
g_label = initGraph()
for ob in res:
  addLabel(g_label, cn, ob)

# print(g_label.serialize(format='turtle'))

# Get place data

In [35]:
q = PREFIXES + """ 
SELECT DISTINCT ?id ?label (COALESCE(?wiki1, ?wiki2) AS ?wiki) ?long ?lat
WHERE {
  ?id a nbf:Place ; skos:prefLabel ?label .
  OPTIONAL { ?id nbf:wikidata ?wiki1 }
  OPTIONAL { ?id owl:sameAs/nbf:wikidata ?wiki2 }
  OPTIONAL { ?id <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long ;
                 <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat 
  }
} """
# print(q)
sparql.setQuery(q)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

print(f"Datafields {results['head']['vars']}")
res_places = convertDatatypes(results)
print(f"{len(res_places)} results") # 5082 results -> 4969 results

for ob in res_places[:5]:
  print(ob)

Datafields ['id', 'label', 'wiki', 'long', 'lat']
4966 results
{'id': 'http://ldf.fi/nbf/places/Uukuniemi', 'label': 'Uukuniemi', 'wiki': 'http://www.wikidata.org/entity/Q2680634', 'long': 30.0075595, 'lat': 61.7884359}
{'id': 'http://ldf.fi/nbf/places/Rauma', 'label': 'Rauma', 'wiki': 'http://www.wikidata.org/entity/Q37013', 'long': 21.505314582930446, 'lat': 61.13641023938808}
{'id': 'http://ldf.fi/nbf/places/Rauma%2C%20K.Helsinki', 'label': 'Rauma', 'wiki': 'http://www.wikidata.org/entity/Q37013', 'long': 21.5063758, 'lat': 61.13290420000001}
{'id': 'http://ldf.fi/nbf/places/Helsinki', 'label': 'Helsinki', 'wiki': 'http://www.wikidata.org/entity/Q1757', 'long': 24.98899508545681, 'lat': 60.165879062998215}
{'id': 'http://ldf.fi/nbf/places/Dublin', 'label': 'Dublin', 'wiki': 'http://www.wikidata.org/entity/Q1761', 'long': -6.2603097, 'lat': 53.3498053}


```
<http://www.intavia.eu/placeproxy/bs/1009345> a crm:E53_Place,
        idm-core:Place_Proxy ;
    rdfs:label "Kaustinen" ;
    # crm:P1_is_identified_by <http://www.intavia.eu/name/4/bs1009345> ;
    crm:P168_place_is_defined_by "Point ( +016.371690 +048.208199 )"^^geo:wktLiteral ;
    owl:sameAs wd:Q1009345 .
  crm:P168_place_is_defined_by "Point ( +51.21006 -16.1619 )"^^geo:wktLiteral .
```

In [36]:
long, lat = +016.371690, -48.208199
f'Point ( {long:+g} {lat:+g} )'

'Point ( +16.3717 -48.2082 )'

In [37]:
PLACE_LOOKUP = {}
def writePlace(g, ob):
  v = ob.get('wiki')
  if v:
    url = URIRef(v.replace(WD.Q, IDM_PLACE))
    g.add((url, RDF.type, CRM.E53_Place))
    g.add((url, RDF.type, IDMCORE.Place_Proxy))
    if not (url, RDFS.label, None) in g:
      g.add((url, RDFS.label, Literal(ob.get('label'))))
    else:
      g.add((url, SKOS.altLabel, Literal(ob.get('label'))))
    g.add((url, OWL.sameAs, URIRef(v)))

    url_ap = URIRef(v.replace(WD.Q, IDM_PLACEAPPELLATION))
    g.add((url_ap, RDF.type, CRM.E33_E41_Linguistic_Appellation))
    g.add((url_ap, RDFS.label, Literal(ob.get('label'))))
    g.add((url, CRM.P1_is_identified_by, url_ap))

    lat, long = ob.get('lat'), ob.get('long')
    if lat and long and (not (url, CRM.P168_place_is_defined_by, None) in g):
      # practice described in https://github.com/InTaVia/source-dataset-conversion/issues/2
      #
      # PREFIX geo: <http://www.opengis.net/ont/geosparql#>
      # 
      # to model in format 
      # geo:hasGeometry _:node1g02elp9ox8232762 .
      # _:node1g02elp9ox8232762 crm:P168_place_is_defined_by "Point ( +016.371690 +048.208199 )"^^geo:wktLiteral; a sf:Point .
      # exmple in 
      #    g.add((URIRef(idmapis+'place/'+row['place_id']), crm.P168_place_is_defined_by, URIRef(idmapis+'spaceprimitive/'+row['place_id'])))
      #    g.add((URIRef(idmapis+'spaceprimitive/'+row['place_id']), rdf.type, crm.E94_Space_Primitive))
      #    g.add((URIRef(idmapis+'spaceprimitive/'+row['place_id']), crm.P168_place_is_defined_by, Literal(("Point " + row['place_lat']+' '+row['place_lng']), datatype=geo.wktLiteral)))

      url_sp = URIRef(v.replace(WD.Q, IDM_SPACEPRIMITIVE))
      g.add((url, CRM.P168_place_is_defined_by, url_sp))
      g.add((url_sp, RDF.type, CRM.E94_Space_Primitive))
      g.add((url_sp, CRM.P168_place_is_defined_by, Literal(f"Point ( {long:+g} {lat:+g} )", datatype=GEO.wktLiteral)))

    return url
  return None

g_places = initGraph()
for ob in res_places:
  url = writePlace(g_places, ob)
  if url:
    PLACE_LOOKUP[ob.get('id')] = url

print(f'{len(g_places)} triples in file') # 39280 triples in file
print(g_places.serialize(format='turtle')[:2500], '\n\n...\n')

38366 triples in file
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix geo: <http://www.opengis.net/ont/geosparql#> .
@prefix idmcore: <http://www.intavia.eu/idm-core/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wd: <http://www.wikidata.org/entity/> .

<http://www.intavia.eu/bs/place/100> a crm:E53_Place,
        idmcore:Place_Proxy ;
    rdfs:label "Boston" ;
    crm:P168_place_is_defined_by <http://www.intavia.eu/bs/spaceprimitive/100> ;
    crm:P1_is_identified_by <http://www.intavia.eu/bs/placeappellation/100> ;
    owl:sameAs wd:Q100 .

<http://www.intavia.eu/bs/place/1000> a crm:E53_Place,
        idmcore:Place_Proxy ;
    rdfs:label "Gabon" ;
    crm:P168_place_is_defined_by <http://www.intavia.eu/bs/spaceprimitive/1000> ;
    crm:P1_is_identified_by <http://www.intavia.eu/bs/placeappellation/1000> ;
    owl:sameAs wd:Q1000 .

<http://www.inta

In [38]:
class Indexer():
    def __init__(self, namespace=None, prefix="", useHashing=False):
        self.__dict = {}

    def __getIndex(self, tpl):
        if isinstance(tpl, list):
            return self.__getIndex(tuple(tpl))
          
        if not tpl in self.__dict:
            idx = 1+len(list(self.__dict.values()))
            self.__dict[tpl] = str(idx)
        return self.__dict[tpl]
    
    def items(self):
        return self.__dict.items()

    def clear(self):
        self.__dict = {}

    def get(self, tpl):
        return self.__getIndex(tpl)

ix = Indexer()
ix.get('url1234'), ix.get('[1,2,3]'), ix.get('url1234')

('1', '2', '1')

# Query events 



## birth and death

In [39]:
sparql.setQuery(PREFIXES +
""" SELECT DISTINCT *
WHERE {
  VALUES ?id { """ +IDS+ """ }
  VALUES (?prop ?crmclass ?role) 
    { (crm:P98_brought_into_life crm:E67_Birth 'born_person') (crm:P100_was_death_of crm:E69_Death 'deceased_person') }
  ?id foaf:focus ?prs ; skosxl:prefLabel/skos:prefLabel ?label .
  ?evt ?prop ?prs .
  OPTIONAL { ?evt nbf:time ?tspan 
    OPTIONAL { ?tspan skos:prefLabel ?time_label }
    OPTIONAL { ?tspan <http://vocab.getty.edu/ontology#estStart> ?time_start }

    OPTIONAL { ?tspan <http://vocab.getty.edu/ontology#estEnd> ?time_end }
  }
  OPTIONAL { ?evt nbf:place ?place }
} """)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results") # 12294 results

print(f"Datafields {results['head']['vars']}")
res = convertDatatypes(results)

for ob in res[:5]:
  print(ob)

12294 results
Datafields ['id', 'prop', 'crmclass', 'role', 'prs', 'label', 'evt', 'tspan', 'time_label', 'time_start', 'time_end', 'place']
{'id': 'http://ldf.fi/nbf/p1', 'prop': 'http://www.cidoc-crm.org/cidoc-crm/P98_brought_into_life', 'crmclass': 'http://www.cidoc-crm.org/cidoc-crm/E67_Birth', 'role': 'born_person', 'prs': 'http://ldf.fi/nbf/p1-actor', 'label': 'Konttinen, Helena', 'evt': 'http://ldf.fi/nbf/birth1', 'tspan': 'http://ldf.fi/nbf/times/t1871-06-18', 'time_label': '18.6.1871', 'time_start': '1871-06-18', 'time_end': '1871-06-18', 'place': 'http://ldf.fi/nbf/places/Uukuniemi'}
{'id': 'http://ldf.fi/nbf/p1', 'prop': 'http://www.cidoc-crm.org/cidoc-crm/P100_was_death_of', 'crmclass': 'http://www.cidoc-crm.org/cidoc-crm/E69_Death', 'role': 'deceased_person', 'prs': 'http://ldf.fi/nbf/p1-actor', 'label': 'Konttinen, Helena', 'evt': 'http://ldf.fi/nbf/death1', 'tspan': 'http://ldf.fi/nbf/times/t1916', 'time_label': '1916', 'time_start': '1916-01-01', 'time_end': '1916-12-31

```
<http://www.intavia.eu/birthevent/bs12346> a crm:E67_Birth ;
   crm:P4_has_time-span <http://www.intavia.eu/timespan/1/12346> ;
   crm:P7_took_place_at <http://www.intavia.eu/placeproxy/bs/12346> ;
   crm:P98_brought_into_life <http://www.intavia.eu/personproxy/70682/12346> .

<http://www.intavia.eu/deathevent/bs12345> a crm:E69_Death ;
    crm:P100_was_death_of <http://www.intavia.eu/personproxy/85656/12345> ;
    crm:P4_has_time-span <http://www.intavia.eu/timespan/2/12345> .

<http://www.intavia.eu/apis/deathevent/147956> rdfs:label "Death of Hedda Sauer" ;
    bioc:had_participant_in_role <http://www.intavia.eu/apis/deceased_person/147956> ;
    crm:P4_has_time-span <http://www.intavia.eu/apis/death/timespan/147956> ;
    crm:P7_took_place_at <http://www.intavia.eu/apis/place/14608> .

<http://www.intavia.eu/apis/deceased_person/147956> a idmrole:deceased_person ;
    bioc:inheres_in <http://www.intavia.eu/apis/personproxy/147956> .

<http://www.intavia.eu/apis/born_person/92720> a idmrole:born_person ;
    bioc:inheres_in <http://www.intavia.eu/apis/personproxy/92720> .

<http://www.intavia.eu/timespan/1/12345> a crm:E52_Time-Span ;
    crm:P82a_begin_of_the_begin "1850-08-25T00:00:00" ;
    crm:P82b_end_of_the_end "1850-08-25T23:59:59" ;
    rdfs:label "1904" .
```

In [40]:
timespanIndexers = defaultdict(Indexer)

def getBioEventUrl(eventtype, prs_id):
  return URIRef('http://www.intavia.eu/bs/{}/{}'.format(eventtype, prs_id))

# datetimes in format “1856-07-10T00:00:00”^^xsd:dateTime
def getTimespan(g, _id, idx, time_label, time_start, time_end):
  tspan = URIRef('http://www.intavia.eu/bs/timespan/{}/{}'.format(idx, _id))
  g.add((tspan, RDF.type, CRM["E52_Time-Span"]))
  if time_label:
    st = re.sub(r'(\d+)\s*([–-])', r'\1\2', time_label)
    st = re.sub(r'\s*([–-])\s*(\d+)', r'\1\2', st)
    g.add((tspan, RDFS.label, Literal(st)))
  if time_start:
    g.add((tspan, CRM.P82a_begin_of_the_begin, Literal(time_start+'T00:00:00', datatype=XSD.dateTime)))
  if time_end:
    g.add((tspan, CRM.P82b_end_of_the_end, Literal(time_end+'T23:59:59', datatype=XSD.dateTime)))
  return tspan
    
def addBioEvent(g, ob):
  
  prs_id = stripNBFUrl(ob.get('id'))
  prs_url = nbfPerson2Intavia(ob.get('id'))

  cls = ob.get('crmclass')
  if 'Birth' in cls:
    eventtype, eventlabel = 'birthevent', 'Birth'
  elif 'Death' in cls:
    eventtype, eventlabel = 'deathevent', 'Death'
  else:
    return
  
  evt_url = getBioEventUrl(eventtype, prs_id)
  role_url = URIRef('http://www.intavia.eu/bs/{}/{}'.format(ob.get('role'), prs_id))
  
  g.add((evt_url, RDF.type, URIRef(ob.get('crmclass'))))
  g.add((evt_url, URIRef(ob.get('prop')), prs_url))
  g.add((evt_url, BIOC.had_participant_in_role, role_url))
  g.add((evt_url, RDFS.label, Literal("{} of {}".format(eventlabel, ob.get('label')))))

  g.add((role_url, RDF.type, IDM_ROLE[ob.get('role')]))
  g.add((role_url, BIOC.inheres_in, prs_url))

  g.add((prs_url, BIOC.bearer_of, role_url))
  
  time_arr = [ob.get(k) for k in ['time_label', 'time_start', 'time_end']]
  if any(time_arr):
    idx = timespanIndexers[prs_id].get(time_arr[1:])
    tspan = getTimespan(g, prs_id, idx, *time_arr)
    g.add((evt_url, CRM['P4_has_time-span'], tspan))
  
  v = PLACE_LOOKUP.get(ob.get('place'))
  if v:
    g.add((evt_url, CRM.P7_took_place_at, v))

g_bioevent = initGraph()
for ob in res:
  addBioEvent(g_bioevent, ob)

print(g_bioevent.serialize(format='turtle')[:1500], '...\n', g_bioevent.serialize(format='turtle')[-3000:])

@prefix bioc: <http://ldf.fi/schema/bioc/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix idmrole: <http://www.intavia.eu/idm-role/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://www.intavia.eu/bs/birthevent/1> a crm:E67_Birth ;
    rdfs:label "Birth of Konttinen, Helena" ;
    bioc:had_participant_in_role <http://www.intavia.eu/bs/born_person/1> ;
    crm:P4_has_time-span <http://www.intavia.eu/bs/timespan/1/1> ;
    crm:P7_took_place_at <http://www.intavia.eu/bs/place/2680634> ;
    crm:P98_brought_into_life <http://www.intavia.eu/bs/personproxy/1> .

<http://www.intavia.eu/bs/birthevent/10> a crm:E67_Birth ;
    rdfs:label "Birth of Jussoila, Johannes" ;
    bioc:had_participant_in_role <http://www.intavia.eu/bs/born_person/10> ;
    crm:P4_has_time-span <http://www.intavia.eu/bs/timespan/1/10> ;
    crm:P7_took_place_at <http://www.intavia.eu/bs/place/37013> ;
    crm:P98_brought_into_life <http

# New Section

## Occupations

```
https://api.triplydb.com/s/Ed0oKLyXk
```

In [41]:
q = PREFIXES + """ 
SELECT DISTINCT ?id ?occ 
WHERE {
  VALUES ?id { """ +IDS+ """ }
  ?id foaf:focus ?prs .
  ?prs ?oprop ?occ .
  ?occ a nbf:Title 
} """

sparql.setQuery(q)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results")

print(f"Datafields {results['head']['vars']}") # 12692 results
res = convertDatatypes(results)

for ob in res[:5]:
  print(ob)

12689 results
Datafields ['id', 'occ']
{'id': 'http://ldf.fi/nbf/p1', 'occ': 'http://ldf.fi/nbf/titles/horrossaarnaaja'}
{'id': 'http://ldf.fi/nbf/p1', 'occ': 'http://ldf.fi/nbf/titles/uskonnollinen%20her%c3%a4tt%c3%a4j%c3%a4'}
{'id': 'http://ldf.fi/nbf/p10', 'occ': 'http://ldf.fi/nbf/titles/katolinen%20pappi'}
{'id': 'http://ldf.fi/nbf/p10', 'occ': 'http://ldf.fi/nbf/titles/vastauskonpuhdistuksen%20edustaja'}
{'id': 'http://ldf.fi/nbf/p100', 'occ': 'http://ldf.fi/nbf/titles/valtaneuvos'}


In [42]:
IDS_OCC = list(set([ob.get('occ') for ob in res]))
IDS_OCC[:10]

block = ' '.join(["<{}>".format(x) for x in IDS_OCC])

q = PREFIXES + """ 
SELECT DISTINCT *
WHERE {
  VALUES ?occ { """ +block+ """ }
  ?occ skos:prefLabel ?label1 .
  OPTIONAL { ?occ nbf:wikidata ?wiki1 . OPTIONAL { ?occ skos:hiddenLabel ?alabel1 }}
  OPTIONAL { 
    ?occ skos:broader ?occ2 . 
    ?occ2 skos:prefLabel ?label2 ; nbf:wikidata ?wiki2 .
    OPTIONAL { ?occ2 skos:hiddenLabel ?alabel2 }
    OPTIONAL {
      ?occ2 skos:broader ?occ3 . 
      ?occ3 skos:prefLabel ?label3 ; nbf:wikidata ?wiki3 
      OPTIONAL { ?occ3 skos:hiddenLabel ?alabel3 }
    }
  }
}  """

sparql.setQuery(q)

sparql.setReturnFormat(JSON)
sparql.setMethod(POST)

results = sparql.query().convert()
res_occ = convertDatatypes(results)

print(f"{len(res_occ)} results") # 13375 results
for ob in res_occ[:5]:
  print(ob)

6529 results
{'occ': 'http://ldf.fi/nbf/titles/suojeluskuntaupseeri', 'label1': 'suojeluskuntaupseeri', 'occ2': 'http://ldf.fi/nbf/titles/upseeri', 'label2': 'upseeri', 'wiki2': 'http://www.wikidata.org/entity/Q189290', 'alabel2': 'military officer'}
{'occ': 'http://ldf.fi/nbf/titles/folkh%c3%a4lsanin%20toimitusjohtaja', 'label1': 'Folkhälsanin toimitusjohtaja', 'occ2': 'http://ldf.fi/nbf/titles/toimitusjohtaja', 'label2': 'toimitusjohtaja', 'wiki2': 'http://www.wikidata.org/entity/Q484876', 'alabel2': 'chief executive officer', 'occ3': 'http://ldf.fi/nbf/titles/johtaja', 'label3': 'johtaja', 'wiki3': 'http://www.wikidata.org/entity/Q1162163', 'alabel3': 'director'}
{'occ': 'http://ldf.fi/nbf/titles/folkh%c3%a4lsanin%20toimitusjohtaja', 'label1': 'Folkhälsanin toimitusjohtaja', 'occ2': 'http://ldf.fi/nbf/titles/toimitusjohtaja', 'label2': 'toimitusjohtaja', 'wiki2': 'http://www.wikidata.org/entity/Q484876', 'alabel2': 'managing director', 'occ3': 'http://ldf.fi/nbf/titles/johtaja', 'la

In [43]:
g_occupations = initGraph()
OCC_LOOKUP = {}

for ob in res_occ:
  for s,t in [('wiki1', 'label1'), ('wiki2', 'label2'), ('wiki3', 'label3')]:
    wiki = ob.get(s)
    if wiki:
      # <'http://www.wikidata.org/entity/Q294126'> -> <http://www.intavia.eu/event/bs294126>
      url = URIRef(wiki.replace('http://www.wikidata.org/entity/Q', 'http://www.intavia.eu/bs/occupation/'))
      
      g_occupations.add((url, RDF.type, BIOC.Occupation))
      g_occupations.add((url, OWL.sameAs, URIRef(wiki)))

      st = ob.get(t)
      if st:
        g_occupations.add((url, SKOS.altLabel, Literal(st, lang='fi')))

      st = ob.get('a'+t)
      if st:
        if (url, RDFS.label, Literal(st, lang='en')) in g_occupations:
            pass
        elif (url, RDFS.label, None) in g_occupations:
            # g_occupations.preferredLabel(url, lang='en') <- doesn't work in rdflib 6.2.0
           g_occupations.add((url, SKOS.altLabel, Literal(st, lang='en')))
        else:
          g_occupations.add((url, RDFS.label, Literal(st, lang='en')))
      
      OCC_LOOKUP[ob.get('occ')] = url
      
      break

print(g_occupations.serialize(format='turtle')[:1000])

@prefix bioc: <http://ldf.fi/schema/bioc/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wd: <http://www.wikidata.org/entity/> .

<http://www.intavia.eu/bs/occupation/100511448> a bioc:Occupation ;
    rdfs:label "Deputy Chancellor of Justice"@en ;
    owl:sameAs wd:Q100511448 ;
    skos:altLabel "apulaisoikeuskansleri"@fi .

<http://www.intavia.eu/bs/occupation/100796092> a bioc:Occupation ;
    rdfs:label "Justice of the Supreme Administrative Court"@en ;
    owl:sameAs wd:Q100796092 ;
    skos:altLabel "korkeimman hallinto-oikeuden jäsen"@fi .

<http://www.intavia.eu/bs/occupation/100796356> a bioc:Occupation ;
    rdfs:label "President of the Supreme Administrative Court"@en ;
    owl:sameAs wd:Q100796356 ;
    skos:altLabel "korkeimman hallinto-oikeuden presidentti"@fi .

<http://www.intavia.eu/bs/occupation/101248871> a bioc:Occupation ;
    rdfs:label "insur

In [44]:
list(OCC_LOOKUP.items())[:10]

[('http://ldf.fi/nbf/titles/suojeluskuntaupseeri',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/189290')),
 ('http://ldf.fi/nbf/titles/folkh%c3%a4lsanin%20toimitusjohtaja',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/484876')),
 ('http://ldf.fi/nbf/titles/keilaaja',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/4951095')),
 ('http://ldf.fi/nbf/titles/eurajoen%20kunnallislautakunnan%20esimies',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/1240788')),
 ('http://ldf.fi/nbf/titles/kamreeri',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/2091370')),
 ('http://ldf.fi/nbf/titles/tulkki',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/183065')),
 ('http://ldf.fi/nbf/titles/viipurin%20maistraatin%20sihteeri',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/80687')),
 ('http://ldf.fi/nbf/titles/teatterihistorian%20tutkija',
  rdflib.term.URIRef('http://www.intavia.eu/bs/occupation/1650915')),
 ('http://ldf.fi/nb

In [45]:
for ob in res:
  url = OCC_LOOKUP.get(ob.get('occ'))
  if url:
    g.add((nbfPerson2Intavia(ob.get('id')), BIOC.has_occupation, url))

## Query lifetime events

https://api.triplydb.com/s/twyfgazfE

```
Upper class event types:
crm:E5_Event
|- intavia:Career
|- crm:E12_Production
|- intavia:Honour
```


In [46]:
q = PREFIXES + """ SELECT DISTINCT 
?id ?crmcls ?evt ?label ?time_label ?time_start ?time_end ?place ?occ
WHERE {
  VALUES ?id { """ +IDS+ """ }
  VALUES (?cls ?crmcls) { 
    (nbf:Career idm-core:Career) 
    (nbf:Product crm:E12_Production) 
    (nbf:Honour idm-core:Honour) 
    (nbf:Event crm:E5_Event) 
  }
  ?id foaf:focus ?prs .
  ?evt bioc:inheres_in ?prs ; a ?cls ; skos:prefLabel ?label .
  OPTIONAL { ?evt nbf:time ?tspan 
    OPTIONAL { ?tspan skos:prefLabel ?time_label }
    OPTIONAL { ?tspan <http://vocab.getty.edu/ontology#estStart> ?time_start }

    OPTIONAL { ?tspan <http://vocab.getty.edu/ontology#estEnd> ?time_end }
  }

  OPTIONAL { ?evt nbf:place ?place }

  OPTIONAL { ?evt nbf:has_occupation ?occ }
} """

sparql.setQuery(q)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results") # 189780 results -> 125713

print(f"Datafields {results['head']['vars']}")
res = convertDatatypes(results)

for ob in res[:5]:
  print(ob)

125325 results
Datafields ['id', 'crmcls', 'evt', 'label', 'time_label', 'time_start', 'time_end', 'place', 'occ']
{'id': 'http://ldf.fi/nbf/p10', 'crmcls': 'http://www.cidoc-crm.org/cidoc-crm/E5_Event', 'evt': 'http://ldf.fi/history/histo/p3892', 'label': 'Kirjoitettiin ensimmäinen suomen kielioppi', 'time_label': '1500-luvun loppu - 1600-luvun alku', 'time_start': '1560-01-01', 'time_end': '1639-12-31', 'place': 'http://www.yso.fi/onto/suo/A0009_-551849'}
{'id': 'http://ldf.fi/nbf/p100', 'crmcls': 'http://www.intavia.eu/idm-core/Career', 'evt': 'http://ldf.fi/nbf/e100-1', 'label': 'Valtaneuvos 1501', 'time_label': '1501', 'time_start': '1501-01-01', 'time_end': '1501-12-31', 'occ': 'http://ldf.fi/nbf/titles/valtaneuvos'}
{'id': 'http://ldf.fi/nbf/p100', 'crmcls': 'http://www.intavia.eu/idm-core/Career', 'evt': 'http://ldf.fi/nbf/e100-2', 'label': 'Kastelholman linnanpäällikkö 1508-1514', 'time_label': '1508-1514', 'time_start': '1508-01-01', 'time_end': '1514-12-31', 'occ': 'http://l

```
<http://www.intavia.eu/event/bs125-1> a crm:E5_Event ;
    bioc:had_participant_in_role <http://www.intavia.eu/idm/role/participatingActor/bs125-1> ;
    crm:P4_has_time-span <http://www.intavia.eu/timespan/2/12345> ;
    crm:P7_took_place_at <http://www.intavia.eu/placeproxy/bs/196397>
    rdfs:label "Tapahtui niin joskus ja jossain" .

<http://www.intavia.eu/personproxy/bs/12345> bioc:bearer_of <http://www.intavia.eu/idm/role/participatingActor/bs125-1> .

<http://www.intavia.eu/idm/role/participatingActor/bs125-1> a bioc:Event_Role .
```

In [47]:
g_event = initGraph()
eventcn = defaultdict(set)

for ob in res:
  prs_id = stripNBFUrl(ob.get('id'))
  eventcn[prs_id].add(ob.get('evt'))
  evt_idx = len(eventcn[prs_id])
  evt_url = URIRef('http://www.intavia.eu/bs/event/{}-{}'.format(prs_id, evt_idx))
  role_url = URIRef('http://www.intavia.eu/idm/role/participatingActor/{}-{}'.format(prs_id, evt_idx))

  g_event.add((evt_url, RDF.type, URIRef(ob.get('crmcls'))))
  g_event.add((evt_url, BIOC.had_participant_in_role, role_url))
  g_event.add((evt_url, RDFS.label, Literal(ob.get('label'))))

  role_cls = OCC_LOOKUP.get(ob.get('occ'), BIOC.Event_Role) 
  g_event.add((role_url, RDF.type, role_cls))

  g_event.add((nbfPerson2Intavia(ob.get('id')), BIOC.bearer_of, role_url))

  time_arr = [ob.get(k) for k in ['time_label', 'time_start', 'time_end']]
  if any(time_arr):
    cn[prs_id] += 1
    idx = timespanIndexers[prs_id].get(time_arr[1:])
    tspan = getTimespan(g_event, prs_id, idx, *time_arr)
    g_event.add((evt_url, CRM['P4_has_time-span'], tspan))

  v = PLACE_LOOKUP.get(ob.get('place'))
  if v:
    g_event.add((evt_url, CRM.P7_took_place_at, v))

print(g_event.serialize(format='turtle')[:2000], '\n...\n', g_event.serialize(format='turtle')[-2000:])

@prefix bioc: <http://ldf.fi/schema/bioc/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix idmcore: <http://www.intavia.eu/idm-core/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://www.intavia.eu/bs/event/10-1> a crm:E5_Event ;
    rdfs:label "Kirjoitettiin ensimmäinen suomen kielioppi" ;
    bioc:had_participant_in_role <http://www.intavia.eu/idm/role/participatingActor/10-1> ;
    crm:P4_has_time-span <http://www.intavia.eu/bs/timespan/3/10> .

<http://www.intavia.eu/bs/event/100-1> a idmcore:Career ;
    rdfs:label "Valtaneuvos 1501" ;
    bioc:had_participant_in_role <http://www.intavia.eu/idm/role/participatingActor/100-1> ;
    crm:P4_has_time-span <http://www.intavia.eu/bs/timespan/3/100> .

<http://www.intavia.eu/bs/event/100-2> a idmcore:Career ;
    rdfs:label "Kastelholman linnanpäällikkö 1508-1514" ;
    bioc:had_participant_in_role <http://www.intavia.eu/idm/role/participatingActor/100-2>

# query family relations
[Yasgui](https://api.triplydb.com/s/5hLkJ5tQd)

In [48]:
sparql.setQuery(PREFIXES +
""" PREFIX nbfrels: <http://ldf.fi/nbf/relations/>
PREFIX intavia_rel: <http://www.intavia.eu/personreltype/> 

SELECT DISTINCT *
WHERE {
  VALUES ?id { """ +IDS+ """ }
  # VALUES ?id { <http://ldf.fi/nbf/p992> }
  VALUES (?cls ?relcls) {
    (nbfrels:Spouse intavia_rel:spouse)
    (nbfrels:Wife intavia_rel:spouse)
    (nbfrels:Husband intavia_rel:spouse)
    (nbfrels:Parent intavia_rel:parent)
    (nbfrels:Father intavia_rel:parent)
    (nbfrels:Mother intavia_rel:parent)
    (nbfrels:Brother intavia_rel:sibling)
    (nbfrels:Sister intavia_rel:sibling)
    (nbfrels:Daughter intavia_rel:child)
    (nbfrels:Son intavia_rel:child)
  }
  
  VALUES ?source { sources:source1 }
  ?id bioc:has_family_relation ?rel .
  ?rel a ?cls ; bioc:inheres_in/owl:sameAs* ?relative .
  FILTER NOT EXISTS { ?relative owl:sameas [] }
  ?relative dct:source ?source .
} """)

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

res = results["results"]["bindings"]
print(f"{len(res)} results") # 2751 results 

print(f"Datafields {results['head']['vars']}")
res = convertDatatypes(results)

for ob in res[:5]:
  print(ob)

2762 results
Datafields ['id', 'cls', 'relcls', 'source', 'rel', 'relative']
{'id': 'http://ldf.fi/nbf/p1001', 'cls': 'http://ldf.fi/nbf/relations/Daughter', 'relcls': 'http://www.intavia.eu/personreltype/child', 'source': 'http://ldf.fi/nbf/sources/source1', 'rel': 'http://ldf.fi/nbf/rel/daughter_3416', 'relative': 'http://ldf.fi/nbf/p3416'}
{'id': 'http://ldf.fi/nbf/p1001', 'cls': 'http://ldf.fi/nbf/relations/Son', 'relcls': 'http://www.intavia.eu/personreltype/child', 'source': 'http://ldf.fi/nbf/sources/source1', 'rel': 'http://ldf.fi/nbf/rel/son_5701', 'relative': 'http://ldf.fi/nbf/p5701'}
{'id': 'http://ldf.fi/nbf/p1003', 'cls': 'http://ldf.fi/nbf/relations/Son', 'relcls': 'http://www.intavia.eu/personreltype/child', 'source': 'http://ldf.fi/nbf/sources/source1', 'rel': 'http://ldf.fi/nbf/rel/son_2178', 'relative': 'http://ldf.fi/nbf/p2178'}
{'id': 'http://ldf.fi/nbf/p1004', 'cls': 'http://ldf.fi/nbf/relations/Son', 'relcls': 'http://www.intavia.eu/personreltype/child', 'source'



```

<http://www.intavia.eu/bs/personrelation/1/12346> a bioc:Family_Relationship_Role ;
    rdf:type <http://www.intavia.eu/personreltype/sibling> ;
    bioc:inheres_in <http://www.intavia.eu/personproxy/ErnstKlimt> .

<http://www.intavia.eu/personreltype/sibling> rdfs:label "Sibling" .
```



In [49]:
g_relations = initGraph()
cn = Counter()

for ob in res:
  v = ob.get('id')
  cn[v] += 1
  url = URIRef('http://www.intavia.eu/bs/personrelation/{}/{}'.format(cn[v],  stripNBFUrl(v)))
  
  g_relations.add((url, RDF.type, URIRef(ob.get('relcls', BIOC.Family_Relationship_Role))))
  g_relations.add((url, BIOC.inheres_in,  nbfPerson2Intavia(ob.get('relative'))))

  g_relations.add((nbfPerson2Intavia(v), BIOC.has_family_relation, url))
  
print(g_relations.serialize(format='turtle')[-2000:])

/personrelation/3/950>,
        <http://www.intavia.eu/bs/personrelation/4/950>,
        <http://www.intavia.eu/bs/personrelation/5/950>,
        <http://www.intavia.eu/bs/personrelation/6/950> .

<http://www.intavia.eu/bs/personproxy/964> bioc:has_family_relation <http://www.intavia.eu/bs/personrelation/1/964>,
        <http://www.intavia.eu/bs/personrelation/2/964>,
        <http://www.intavia.eu/bs/personrelation/3/964>,
        <http://www.intavia.eu/bs/personrelation/4/964>,
        <http://www.intavia.eu/bs/personrelation/5/964>,
        <http://www.intavia.eu/bs/personrelation/6/964> .

<http://www.intavia.eu/bs/personproxy/1181> bioc:has_family_relation <http://www.intavia.eu/bs/personrelation/1/1181>,
        <http://www.intavia.eu/bs/personrelation/2/1181>,
        <http://www.intavia.eu/bs/personrelation/3/1181>,
        <http://www.intavia.eu/bs/personrelation/4/1181>,
        <http://www.intavia.eu/bs/personrelation/5/1181>,
        <http://www.intavia.eu/bs/personrelation

# write output

In [50]:
OUTFILE, PLACEFILE, BIOEVENTFILE, EVENTFILE, LABELFILE, RELATIONFILE, OCCUPATIONFILE = 'bs2intavia.ttl', 'bs2intavia_places.ttl', 'bs2intavia_bioevents.ttl', 'bs2intavia_events.ttl', 'bs2intavia_labels.ttl', 'bs2intavia_relations.ttl', 'bs2intavia_occupations.ttl'
files = [(g, OUTFILE),
            (g_places, PLACEFILE),
            (g_bioevent, BIOEVENTFILE),
            (g_event, EVENTFILE),
            (g_label, LABELFILE),
            (g_relations, RELATIONFILE),
            (g_occupations, OCCUPATIONFILE)
            ]
saveGraphs(files)
g_all = initGraph()
for k,_ in files:
  g_all += k

saveGraphs([(g_all, 'bs2intavia.ttl')])

print("Graph contains {} triples.".format(len(g_all))) # Graph contains 1432170 triples 
! head -n 50 bs2intavia.ttl

Graph contains 1405339 triples.
# Created with script /usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py
# in colab https://colab.research.google.com/drive/1c6hwZs1ooi1G1kfHeB-JYeywemyY-mU-?usp=sharing# python3 /usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-ff59b680-7074-4a06-a530-00a6197e09ae.json
# 2023-05-12 10:03

@prefix bioc: <http://ldf.fi/schema/bioc/> .
@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix geo: <http://www.opengis.net/ont/geosparql#> .
@prefix idmcore: <http://www.intavia.eu/idm-core/> .
@prefix idmrole: <http://www.intavia.eu/idm-role/> .
@prefix idnametype: <http://www.intavia.eu/nametype/> .
@prefix nbf: <http://ldf.fi/nbf/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .


In [51]:
g.serialize(format='turtle')

'@prefix bioc: <http://ldf.fi/schema/bioc/> .\n@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .\n@prefix idmcore: <http://www.intavia.eu/idm-core/> .\n@prefix nbf: <http://ldf.fi/nbf/> .\n@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix wd: <http://www.wikidata.org/entity/> .\n\n<http://commons.wikimedia.org/wiki/Special:FilePath/%D0%92%D0%B8%D1%80%D0%B5%D0%BD%D0%B8%D1%83%D1%81%20%D0%90%D0%BD%D0%B4%D1%80%D0%B5%D0%B9%20%D0%90%D0%BD%D0%B4%D1%80%D0%B5%D0%B5%D0%B2%D0%B8%D1%87.jpg> a crm:E36_Visual_Item ;\n    crm:P138_represents <http://www.intavia.eu/bs/personproxy/1179> .\n\n<http://commons.wikimedia.org/wiki/Special:FilePath/%D0%9D%D0%B8%D0%BA%D0%B8%D1%82%D1%81%D0%BA%D0%B8%D0%B9%20%D0%B1%D0%BE%D1%82%D0%B0%D0%BD%D0%B8%D1%87%D0%B5%D1%81%D0%BA%D0%B8%D0%B9%20%D1%81%D0%B0%D0%B4.%20%D0%9F%D0%B0%D0%BC%D1%8F%D1%82%D0%BD%D0%B8%D0%BA%20%D0%A5.%20%D0%A1%D1%82%D0%B8%D0%B2%D0%B5%D0%BD%D1%83%20-%20panoramio.jpg> a crm:E36_Visual_Ite

## test with localhost https://api.triplydb.com/s/X-eAfrLwz

In [52]:
g_test = initGraph()
g_test.add((NBF.xyz, RDF.value, Literal("true", datatype=XSD.boolean)))
print(g_test.serialize(format='turtle'))

@prefix nbf: <http://ldf.fi/nbf/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

nbf:xyz rdf:value true .


