# Analyse datasets in Timbuctoo-instances

In [10]:
%%capture
%run func.ipynb

## List properties per database

In [11]:
import pandas as pd

def get_ds_props(url):
    ds_names = []
    cap_locs = []
    desc_hrefs = []
    desc_types = []
    desc_lens = []
    rl_num = []
    cl_num = []
    rl_locs = []
    cl_locs = []
    rl_ats = []
    rl_completeds = []
    resources = []
    #n_stats = []
    
    # source description
    source_desc = get_sitemap(url)
    for c_url in source_desc.rs_urls: 
        desc_url = None
        desc_type = None
        ds_names.append(c_url.loc.split('/')[6])
        cap_locs.append(c_url.loc) # loc of capability list
        desc_url = c_url.describedby_href
        desc_type = c_url.describedby_type
        
        # capability list
        cap_list = get_sitemap(c_url.loc)
        # urls
        rl_count = 0
        cl_count = 0
        rl_url = None
        cl_url = None
        for rs_url in cap_list.rs_urls:
            if rs_url.capability == 'resourcelist':
                rl_count += 1
                rl_url = rs_url.loc
            if rs_url.capability == 'changelist':
                cl_count += 1
                cl_url = rs_url.loc
        rl_num.append(rl_count)
        cl_num.append(cl_count)
        rl_locs.append(rl_url)
        cl_locs.append(cl_url)
        
        # dataset description is in RDF
        if desc_url is None:
            desc_url = cap_list.describedby_href
            desc_type = cap_list.describedby_type
        desc_len = None
        if desc_url:
            desc_graph = get_graph(desc_url, desc_type)
            desc_len = len(desc_graph)
        desc_hrefs.append(desc_url)
        desc_types.append(desc_type)
        desc_lens.append(desc_len)
        
        # resourcelist                    
        res_list = get_sitemap(rl_url)
        rl_ats.append(res_list.at)
        rl_completeds.append(res_list.completed)
        # up till now we only encounter 1 resource-url in a resource list. 
        if len(res_list.rs_urls) != 1:
            print('warning, %s has not expected number of 1 resource, but %d' % (rl_url, en(res_list.rs_urls)))
        resource = res_list.rs_urls[0].loc
        resources.append(resource)
        
        # resource
#         n_statements = None
#         try:
#             graph = get_quads(resource)
#             n_statements = len(graph)
#         except:
#             print('could not load', resource)
#         n_stats.append(n_statements)
        
        
    return pd.DataFrame({'ds_name': ds_names, 'cap_loc': cap_locs, 
                         'desc_href': desc_hrefs, 'desc_type': desc_types, 'desc_len': desc_lens,
                         'n_rl': rl_num, 'n_cl': cl_num, 'rl_loc': rl_locs, 'cl_loc': cl_locs,
                         'rl_at': rl_ats, 'rl_completed': rl_completeds, 'resource': resources}) #, 'n_statements': n_stats})

In [12]:
pd.options.display.max_colwidth = 0
df = get_ds_props(ANANSI_URL)
display(df.shape, df.head())

(11, 12)

Unnamed: 0,ds_name,cap_loc,desc_href,desc_type,desc_len,n_rl,n_cl,rl_loc,cl_loc,rl_at,rl_completed,resource
0,wwriters_nl,https://data.anansi.clariah.nl/v5/resourcesync/u38d24500551ccff8d2b0c4f84fc947f45934aa26/wwriters_nl/capabilitylist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u38d24500551ccff8d2b0c4f84fc947f45934aa26/wwriters_nl/description.xml,application/rdf+xml,0,1,1,https://data.anansi.clariah.nl/v5/resourcesync/u38d24500551ccff8d2b0c4f84fc947f45934aa26/wwriters_nl/resourcelist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u38d24500551ccff8d2b0c4f84fc947f45934aa26/wwriters_nl/changelist.xml,2019-01-31 08:04:33.830000+00:00,2019-01-31 12:07:53.827000+00:00,https://data.anansi.clariah.nl/v5/resourcesync/u38d24500551ccff8d2b0c4f84fc947f45934aa26/wwriters_nl/dataset.nq
1,gemeentegeschiedenisnl,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/capabilitylist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/description.xml,application/rdf+xml,0,1,1,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/resourcelist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/changelist.xml,2019-01-31 08:04:34.028000+00:00,2019-01-31 12:07:54.006000+00:00,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/dataset.nq
2,dwc,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/dwc/capabilitylist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/dwc/description.xml,application/rdf+xml,7,1,1,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/dwc/resourcelist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/dwc/changelist.xml,2019-01-31 08:04:34.273000+00:00,2019-01-31 12:07:54.139000+00:00,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/dwc/dataset.nq
3,nlgis,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/nlgis/capabilitylist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/nlgis/description.xml,application/rdf+xml,0,1,1,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/nlgis/resourcelist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/nlgis/changelist.xml,2019-01-31 08:04:34.986000+00:00,2019-01-31 12:07:54.270000+00:00,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/nlgis/dataset.nq
4,gemeentegeschiedenis,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenis/capabilitylist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenis/description.xml,application/rdf+xml,0,1,1,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenis/resourcelist.xml,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenis/changelist.xml,2019-01-31 08:04:33.928000+00:00,2019-01-31 12:07:54.392000+00:00,https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenis/dataset.nq


In [13]:
df.to_csv('data/anansi_resources.csv')

In [14]:
df = get_ds_props(HUYDAT_URL)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 12 columns):
ds_name         39 non-null object
cap_loc         39 non-null object
desc_href       39 non-null object
desc_type       39 non-null object
desc_len        39 non-null int64
n_rl            39 non-null int64
n_cl            39 non-null int64
rl_loc          39 non-null object
cl_loc          39 non-null object
rl_at           39 non-null datetime64[ns, UTC]
rl_completed    39 non-null datetime64[ns, UTC]
resource        39 non-null object
dtypes: datetime64[ns, UTC](2), int64(3), object(7)
memory usage: 3.7+ KB


In [15]:
df.to_csv('data/huydat_resources.csv')

## Inspecting datasets

In [16]:
from rdflib import Literal
import requests
from IPython.core.display import display, HTML
import urllib
from urllib.parse import urlparse


def fr(x):
    return '{:12,}'.format(x).replace(',', '.')

def isresolvable(url):
    try:
        response = requests.get(url)
        return response.ok
    except:
        return False

class GraphProps(object):
    
    def __init__(self, g):
        self.statements = len(g)
        self.contexts = list(g.contexts())
        self.netlocs_s = dict()
        self.netlocs_p = dict()
        self.netlocs_o = dict()
        self.literals = 0
        self.predicates = dict()
        for s,p,o in g.triples((None,None,None)):
            self.predicates[str(p)] = self.predicates.get(str(p), 0) + 1
            
            nl = urlparse(str(s)).netloc
            self.netlocs_s[nl] = self.netlocs_s.get(nl, 0) + 1
            
            nl = urlparse(str(p)).netloc
            self.netlocs_p[nl] = self.netlocs_p.get(nl, 0) + 1
            
            if isinstance(o, Literal):
                self.literals += 1
            else:
                nl = urlparse(str(o)).netloc
                self.netlocs_o[nl] = self.netlocs_o.get(nl, 0) + 1
    
    def print_props(self):
        print('contexts  :', fr(len(self.contexts)))
        print('statements:', fr(self.statements))
        print('literals  :', fr(self.literals))
        nar = self.literals/self.statements
        print('literal ratio:', '{:7.2f}'.format(nar).replace('.', ','))
        
        print()
        print('subject netlocs:', len(self.netlocs_s), self.netlocs_s)
        print('object netlocs:', len(self.netlocs_o), self.netlocs_o)
        
        print()
        edo = 'example.org'
        edos = self.netlocs_s.get(edo, 0) + self.netlocs_p.get(edo, 0) + self.netlocs_o.get(edo, 0)
        print('example.org', '| s', self.netlocs_s.get(edo, 0), '| p', self.netlocs_p.get(edo, 0), '| o', self.netlocs_o.get(edo, 0), '| total', edos)
        
        print()
        print('predicate netlocs:', len(self.netlocs_p), '|', self.netlocs_p)
        print('predicates:', len(self.predicates))
        for key in sorted(self.predicates):
            
            if urlparse(key).netloc != 'example.org' and isresolvable(key):
                display(HTML("""<a target="_blank" href="{}">{}</a>""".format(key, key) + ': ' + str(self.predicates[key])))
            else:
                print('%s: %s' % (key, self.predicates[key]))

In [17]:
def fingerprint(resource_url):
    display(HTML("""<h3>fingerprint</h3>"""))
    display(HTML("""<a target="_blank" href="{}">{}</a>""".format(resource_url, resource_url)))
    g = get_quads(resource_url)
    gp = GraphProps(g)
    gp.print_props()

In [18]:
# fingerprint('https://repository.huygens.knaw.nl/v5/resourcesync/u519bd710306620fa7c56d541ae7b9f5b7f57a706/test_wwdocument/dataset.nq')

In [19]:
# rurl ='https://data.anansi.clariah.nl/v5/resourcesync/u74ccc032adf8422d7ea92df96cd4783f0543db3b/gemeentegeschiedenisnl/dataset.nq'
# g = get_quads(rurl)

In [20]:
# from rdflib import URIRef

# max = 5
# tel = 0
# for p, o in g.predicate_objects(URIRef('https://gemeentegeschiedenis.nl/def/geometry_type')):
#     tel += 1
#     print(p, o)
#     if tel >= max:
#         break

In [21]:
dfanansi = pd.read_csv('data/anansi_resources.csv', index_col=0)
for index, row in dfanansi.iterrows():
    resource_url = row['resource']
    try:
        fingerprint(resource_url)
    except Exception as ex:
        print(type(ex))

contexts  :            1
statements:      509.868
literals  :            0
literal ratio:    0,00

subject netlocs: 3 {'resource.huygens.knaw.nl': 71630, '': 364307, 'www.w3.org': 73931}
object netlocs: 3 {'': 364307, 'resource.huygens.knaw.nl': 71630, 'www.w3.org': 73931}

example.org | s 0 | p 0 | o 0 | total 0

predicate netlocs: 2 | {'schema.org': 344100, 'www.w3.org': 165768}
predicates: 101
http://schema.org/commentsOnPerso: 2508
http://schema.org/commentsOnPerson: 2506
http://schema.org/commentsOnWor: 2364
http://schema.org/commentsOnWork: 2342
http://schema.org/containedInAntholog: 8
http://schema.org/containedInAnthology: 8
http://schema.org/created: 17908
http://schema.org/hasAdaptatio: 46
http://schema.org/hasAdaptation: 38
http://schema.org/hasAnnotationsO: 6
http://schema.org/hasAnnotationsOn: 6
http://schema.org/hasDocumentSourc: 11084
http://schema.org/hasDocumentSource: 11084
http://schema.org/hasEditio: 4
http://schema.org/hasEdition: 4
http://schema.org/hasGenr: 10750

contexts  :            1
statements:       45.783
literals  :       19.891
literal ratio:    0,43

subject netlocs: 7 {'gemeentegeschiedenis.nl': 32898, 'triply.cc': 8619, 'cbs.nl': 1388, 'www.opengis.net': 2873, 'www.w3.org': 2, 'rdfs.org': 2, 'www.gemeentegeschiedenis.nl': 1}
object netlocs: 7 {'gemeentegeschiedenis.nl': 15882, 'www.opengis.net': 2873, 'cbs.nl': 1387, 'triply.cc': 5746, 'rdfs.org': 1, 'www.w3.org': 2, 'www.gemeentegeschiedenis.nl': 1}

example.org | s 0 | p 0 | o 0 | total 0

predicate netlocs: 8 | {'gemeentegeschiedenis.nl': 18311, 'www.w3.org': 17399, 'www.opengis.net': 8619, 'cbs.nl': 1440, 'rdfs.org': 4, 'xmlns.com': 2, 'purl.org': 6, 'timbuctoo.huygens.knaw.nl': 2}
predicates: 21


http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasIndexConfig: 2


https://cbs.nl/def/gemeenteCode: 1440
https://gemeentegeschiedenis.nl/def/geometry_type: 2873
https://gemeentegeschiedenis.nl/def/id: 2881
https://gemeentegeschiedenis.nl/def/temporalExtension: 5804
https://gemeentegeschiedenis.nl/def/validSince: 3144
https://gemeentegeschiedenis.nl/def/validUntil: 3609


contexts  :            1
statements:      529.872
literals  :      133.680
literal ratio:    0,25

subject netlocs: 7 {'example.org': 508093, 'sws.geonames.org': 4236, 'www.dwc.knaw.nl': 7832, 'timbuctoo.huygens.knaw.nl': 7555, 'viaf.org': 2137, 'data.anansi.clariah.nl': 18, 'creativecommons.org': 1}
object netlocs: 7 {'example.org': 374422, 'sws.geonames.org': 4236, 'www.dwc.knaw.nl': 7832, 'timbuctoo.huygens.knaw.nl': 7555, 'viaf.org': 2137, 'data.anansi.clariah.nl': 9, 'creativecommons.org': 1}

example.org | s 508093 | p 248695 | o 374422 | total 1131210

predicate netlocs: 7 | {'schema.org': 100766, 'example.org': 248695, 'timbuctoo.huygens.knaw.nl': 10712, 'www.w3.org': 167563, 'www.geonames.org': 2123, 'purl.org': 12, 'xmlns.com': 1}
predicates: 59
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bia/predicate/Country: 2123
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bia/predicate/aboutPerson: 24978
http://example.org/datasets/u3370

http://schema.org/label: 588


http://timbuctoo.huygens.knaw.nl/static/v5/predicate/names: 10694
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasIndexConfig: 5
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#summaryDescriptionPredicate: 10
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#summaryTitlePredicate: 3


<class 'rdflib.plugins.parsers.ntriples.ParseError'>


contexts  :            1
statements:       75.949
literals  :       17.707
literal ratio:    0,23

subject netlocs: 6 {'www.gemeentegeschiedenis.nl': 59301, 'iisg.amsterdam': 12876, 'nl.wikipedia.org': 1677, 'nl.dbpedia.org': 1648, 'sws.geonames.org': 446, 'rdfs.org': 1}
object netlocs: 6 {'www.gemeentegeschiedenis.nl': 51251, 'iisg.amsterdam': 3219, 'nl.wikipedia.org': 1677, 'nl.dbpedia.org': 1648, 'sws.geonames.org': 446, 'rdfs.org': 1}

example.org | s 0 | p 0 | o 0 | total 0

predicate netlocs: 6 | {'www.gemeentegeschiedenis.nl': 44768, 'www.w3.org': 21508, 'www.opengis.net': 9657, 'purl.org': 9, 'rdfs.org': 6, 'xmlns.com': 1}
predicates: 30


contexts  :            1
statements:      854.189
literals  :      688.455
literal ratio:    0,81

subject netlocs: 3 {'example.org': 854169, 'data.anansi.clariah.nl': 19, 'creativecommons.org': 1}
object netlocs: 3 {'example.org': 165724, 'data.anansi.clariah.nl': 9, 'creativecommons.org': 1}

example.org | s 854169 | p 350379 | o 165724 | total 1370272

predicate netlocs: 6 | {'timbuctoo.huygens.knaw.nl': 108146, 'schema.org': 229926, 'www.w3.org': 165724, 'example.org': 350379, 'purl.org': 13, 'xmlns.com': 1}
predicates: 27
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bioport/predicate/Bioport_id: 82862
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bioport/predicate/Person_id: 82862
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bioport/predicate/Variant_id: 11604
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bioport/predicate/birthDateRemark: 1729
http://example.org/datasets/u33707283d42

http://timbuctoo.huygens.knaw.nl/static/v5/predicate/names: 108146


contexts  :            1
statements:    1.530.407
literals  :      927.119
literal ratio:    0,61

subject netlocs: 8 {'data.anansi.clariah.nl': 184481, '': 364607, 'schema.org': 45690, 'www.biografischportaal.nl': 734834, 'api.nodegoat.ugent.be': 178814, 'mutations.nodegoat.ugent.be': 18086, 'tic.nodegoat.ugent.be': 3894, 'creativecommons.org': 1}
object netlocs: 8 {'www.biografischportaal.nl': 83280, 'schema.org': 45679, '': 183493, 'mutations.nodegoat.ugent.be': 18086, 'api.nodegoat.ugent.be': 153596, 'data.anansi.clariah.nl': 115265, 'tic.nodegoat.ugent.be': 3888, 'creativecommons.org': 1}

example.org | s 0 | p 0 | o 0 | total 0

predicate netlocs: 9 | {'data.anansi.clariah.nl': 382534, 'www.w3.org': 297070, 'timbuctoo.huygens.knaw.nl': 108189, 'tic.ugent.be': 36172, 'schema.org': 679296, 'tic.nodegoat.ugent.be': 22712, 'purl.org': 4130, 'na.metamatter.nl': 303, 'xmlns.com': 1}
predicates: 117
http://data.anansi.clariah.nl/bioport/predicate/Bioport_id: 98825
http://data.anansi.cla

http://schema.org/Event/congress_series: 2358
http://schema.org/Event/country: 3102
http://schema.org/Event/location_of_the_event: 5652
http://schema.org/Event/location_of_the_event/date_end: 2838
http://schema.org/Event/location_of_the_event/date_start: 2842
http://schema.org/Event/location_of_the_event/geometry: 2806
http://schema.org/Event/location_of_the_event/location_reference: 5612
http://schema.org/Event/location_of_the_event/location_reference_name: 2806
http://schema.org/Event/relation_with_an_organisation: 2956
http://schema.org/Event/relation_with_an_organisation/date_end: 1478
http://schema.org/Event/relation_with_an_organisation/date_start: 1478
http://schema.org/Event/relation_with_an_organisation/geometry: 1431
http://schema.org/Event/relation_with_an_organisation/location_reference: 2862
http://schema.org/Event/relation_with_an_organisation/location_reference_name: 1431
http://schema.org/Event/relation_with_an_organisation/organisation: 2956
http://schema.org/Event/rel

http://schema.org/attendee/geometry: 21501
http://schema.org/attendee/location_reference_name: 21493


http://schema.org/nationality/geometry: 4547
http://schema.org/nationality/location_reference_name: 4547


http://tic.ugent.be/ontology/subset: 36172
http://timbuctoo.huygens.knaw.nl/static/v5/predicate/names: 108174
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasIndexConfig: 6
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasViewConfig: 8
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#summaryTitlePredicate: 1


https://tic.nodegoat.ugent.be/model/type/organization/acronym: 35
https://tic.nodegoat.ugent.be/model/type/organization/aims_and_activities: 29
https://tic.nodegoat.ugent.be/model/type/organization/alternative_names: 100
https://tic.nodegoat.ugent.be/model/type/organization/country: 268
https://tic.nodegoat.ugent.be/model/type/organization/date_of_establishment: 215
https://tic.nodegoat.ugent.be/model/type/organization/end_date: 128
https://tic.nodegoat.ugent.be/model/type/organization/history: 89
https://tic.nodegoat.ugent.be/model/type/organization/name: 491
https://tic.nodegoat.ugent.be/model/type/organization/place: 252
https://tic.nodegoat.ugent.be/model/type/organization/speeckaert: 13
https://tic.nodegoat.ugent.be/model/type/organization/thematic_field: 1436
https://tic.nodegoat.ugent.be/model/type/organization/thematic_field/date_end: 640
https://tic.nodegoat.ugent.be/model/type/organization/thematic_field/date_start: 640
https://tic.nodegoat.ugent.be/model/type/organization/th

contexts  :            1
statements:    1.408.174
literals  :      604.636
literal ratio:    0,43

subject netlocs: 7 {'': 523266, 'api.nodegoat.ugent.be': 741569, 'schema.org': 68453, 'tic.nodegoat.ugent.be': 52805, 'data.bibliotheken.nl': 22065, 'data.anansi.clariah.nl': 15, 'books.google.be': 1}
object netlocs: 7 {'api.nodegoat.ugent.be': 404871, '': 255374, 'data.bibliotheken.nl': 22065, 'tic.nodegoat.ugent.be': 52777, 'schema.org': 68444, 'data.anansi.clariah.nl': 6, 'books.google.be': 1}

example.org | s 0 | p 0 | o 0 | total 0

predicate netlocs: 5 | {'tic.nodegoat.ugent.be': 590736, 'schema.org': 544669, 'www.w3.org': 242479, 'purl.org': 30289, 'xmlns.com': 1}
predicates: 185


http://schema.org/Event/alternative_names: 464
http://schema.org/Event/checked_ml_: 92
http://schema.org/Event/congress_series: 2450
http://schema.org/Event/country: 3250
http://schema.org/Event/description: 705
http://schema.org/Event/event_category: 2910
http://schema.org/Event/import_status: 605
http://schema.org/Event/location_of_the_event: 3312
http://schema.org/Event/location_of_the_event/comments: 1
http://schema.org/Event/location_of_the_event/date_end: 1655
http://schema.org/Event/location_of_the_event/date_start: 1655
http://schema.org/Event/location_of_the_event/geometry: 1649
http://schema.org/Event/location_of_the_event/location_as_in_source: 10
http://schema.org/Event/location_of_the_event/location_reference: 3298
http://schema.org/Event/location_of_the_event/location_reference_name: 1649
http://schema.org/Event/masterlist_: 1582
http://schema.org/Event/number_in_series: 902
http://schema.org/Event/relation_with_an_event: 118
http://schema.org/Event/relation_with_an_event

http://schema.org/attendee/comments: 665
http://schema.org/attendee/geometry: 33981
http://schema.org/attendee/level: 16630
http://schema.org/attendee/location_reference_name: 33981
http://schema.org/attendee/relation_as_in_source: 536
http://schema.org/attendee/relation_type: 18348
http://schema.org/attendee/role: 16698
http://schema.org/attendee/verified_whole_period_: 1


https://tic.nodegoat.ugent.be/model/type/organization/acronym: 88
https://tic.nodegoat.ugent.be/model/type/organization/aims_and_activities: 31
https://tic.nodegoat.ugent.be/model/type/organization/alternative_names: 1570
https://tic.nodegoat.ugent.be/model/type/organization/checked_ml_: 5
https://tic.nodegoat.ugent.be/model/type/organization/congress_organizer: 151
https://tic.nodegoat.ugent.be/model/type/organization/country: 9566
https://tic.nodegoat.ugent.be/model/type/organization/date_of_establishment: 1300
https://tic.nodegoat.ugent.be/model/type/organization/description: 705
https://tic.nodegoat.ugent.be/model/type/organization/end_date: 304
https://tic.nodegoat.ugent.be/model/type/organization/history: 322
https://tic.nodegoat.ugent.be/model/type/organization/identified_: 890
https://tic.nodegoat.ugent.be/model/type/organization/location: 13276
https://tic.nodegoat.ugent.be/model/type/organization/location/comments: 10
https://tic.nodegoat.ugent.be/model/type/organization/loca

contexts  :            1
statements:    1.361.150
literals  :      799.256
literal ratio:    0,59

subject netlocs: 6 {'example.org': 1339392, 'sws.geonames.org': 4236, 'www.dwc.knaw.nl': 7832, 'timbuctoo.huygens.knaw.nl': 7549, 'viaf.org': 2137, 'data.anansi.clariah.nl': 4}
object netlocs: 5 {'example.org': 540140, 'sws.geonames.org': 4236, 'viaf.org': 2137, 'www.dwc.knaw.nl': 7832, 'timbuctoo.huygens.knaw.nl': 7549}

example.org | s 1339392 | p 576238 | o 540140 | total 2455770

predicate netlocs: 7 | {'example.org': 576238, 'schema.org': 330680, 'www.w3.org': 333262, 'timbuctoo.huygens.knaw.nl': 118843, 'www.geonames.org': 2123, 'xmlns.com': 2, 'purl.org': 2}
predicates: 60
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bia/predicate/Country: 2123
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bia/predicate/aboutPerson: 24978
http://example.org/datasets/u33707283d426f900d4d33707283d426f900d4d0d/bia/predicate/atInstitute: 4394
http://exam

http://schema.org/label: 588


http://timbuctoo.huygens.knaw.nl/static/v5/predicate/names: 118840
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasIndexConfig: 1
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#hasViewConfig: 1
http://timbuctoo.huygens.knaw.nl/static/v5/vocabulary#summaryTitlePredicate: 1


<class 'ConnectionResetError'>


<class 'rdflib.plugins.parsers.ntriples.ParseError'>


### Missing Context

- You might expect that the subjects of this `personen` dataset are **typed** as persons: `<x> <is a> <foaf:peron>`
- Invention of onthologies without tenable iri's: `example.org` as namespace

A lot of **context** is assumed and not explicit.