In [1]:
import json
import yaml
import pandas as pd

from SPARQLWrapper import SPARQLWrapper, JSON

In [2]:
with open('../ontos.txt') as fp:
    ontologies = [l.strip() for l in fp.readlines()]

# All ontologies wildcard
ontologies.append('ontos')

endpoints = dict()

for ontology in ontologies:
    e = SPARQLWrapper(f'http://127.0.0.1:9999/blazegraph/namespace/obo-{ontology}/sparql')
    e.setRequestMethod('postdirectly')
    e.setMethod('POST')
    e.setReturnFormat(JSON)
    endpoints[ontology] = e

In [3]:
with open('props_o_types.rq') as fp:
    otypes_query = fp.read()

ont_results = dict()

for ontology, e in list(endpoints.items()):
    e.setQuery(otypes_query)
    data = e.query().convert()['results']['bindings']
    ont_results[ontology] = [
        {
            'prop': r['p']['value'],
            'count': int(r['count']['value']),
            'uris': int(r['uris']['value']),
            'blanks': int(r['blanks']['value']),
            'literals': int(r['literals']['value']),
            'range': r['range1']['value'] if 'range1' in r else '',
            'type': r['type1']['value'] if 'type1' in r else '',
        }
        for r in data
    ]
    print(ontology)

aeo
agro
apollo-sv
bfo
bto
caro
chebi
cl
doid
dron
ehdaa2
envo
fobi
foodon
gaz
go
hp
iao
mp
ncbitaxon
obi
pato
pco
peco
po
ro
symp
uberon
uo
xco
ontos


In [9]:
with open('results/otypes.json', 'w') as fp:
    json.dump(ont_results, fp, indent=4)

In [5]:
ont_dfs = {ontology: pd.DataFrame.from_records(data).set_index('prop') for ontology, data in ont_results.items()}

def calc_errors(r):
    return min(r['uris'] + r['blanks'], r['literals']), max(r['uris'] + r['blanks'], r['literals'])

for df in ont_dfs.values():
    df['errors_min'], df['errors_max'] = zip(*df.apply(calc_errors, axis=1))

In [6]:
df_all = ont_dfs['ontos'].copy()
df_all['prop'] = df_all.index

for ont, df in ont_dfs.items():
    if ont == 'ontos':
        continue
    df_all[[f'{ont}_uris', f'{ont}_blanks', f'{ont}_literals']] = df[['uris', 'blanks', 'literals']]

In [7]:
with open('results/otypes_review.yaml') as fp:
    review = yaml.safe_load(fp)

df_review = pd.DataFrame.from_records(review)
df_review = df_review.join(df_all, on='prop', lsuffix='1')

In [11]:
df_review.to_json('results/otypes_invalid.json', indent=4, orient='index')