In [1]:
from copy import deepcopy
import pickle
import os
import sys

from SPARQLWrapper import SPARQLWrapper, JSON
from ontotrees import OntologyTrees

from dotenv import load_dotenv

load_dotenv()


ignore_properties = [
    "http://dbpedia.org/ontology/birthName",
    "http://www.movieontology.org/2009/11/09/movieontology.owl#companyName",
    "http://www.movieontology.org/2009/10/01/movieontology.owl#title"
]

### Filtrar nós inválidos
Funções para remover nós de propriedades que não retornem resultados na Ontologia

Para realizar essa operação é necessária a criação de um arquivo .env com as crendenciais:

- Credenciais do _endpoint SPARQL_ em um arquivo _.env_, contendo:
    - **DB_ENDPOINT**: Endreço do endpoint SPARQL
    - **LOGIN**: Usuário de acesso ao banco de triplas
    - **PASSWORD**: Senha de acesso ao banco de triplas

In [2]:
def check_property(sparql_wrapper, prop_uri):
    if prop_uri in ignore_properties:
        return False
    sparql_query = f"""
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    ASK
    WHERE {{
    ?a <{prop_uri}> ?b.
    }}
    """
    sparql_wrapper.setQuery(sparql_query)
    sparql_wrapper.setReturnFormat(JSON)
    results = sparql_wrapper.query().convert()
    return results["boolean"]


def get_valid_node(sparql_wrapper, key, tree):
    if tree[key].is_leaf:
        if check_property(sparql_wrapper, tree[key].data):
            return tree[key]
        else:
            return None
    else:
        node = deepcopy(tree[key])
        node.children = []
        for child_key in tree[key].children:
            child = get_valid_node(sparql_wrapper, child_key, tree)
            if child:
                node.add_child(child.data)

        if node.children:
            return node
        else:
            return None


def adjust_properties_tree(sparql_wrapper, properties):
    filtered_tree = dict()
    for key, _ in properties.items():
        valid_node = get_valid_node(sparql_wrapper, key, properties)
        if valid_node:
            filtered_tree[key] = valid_node
    return filtered_tree


def remove_nodes_without_results(ontology_tree, filename="ontology_trees.pkl"):
    sparql_wrapper = SPARQLWrapper(os.getenv("DB_ENDPOINT"))
    sparql_wrapper.setCredentials(os.getenv("LOGIN"), os.getenv("PASSWORD"))

    object_prop_tree = adjust_properties_tree(
        sparql_wrapper, ontology_tree.get_tree("object_properties")
    )
    data_prop_tree = adjust_properties_tree(
        sparql_wrapper, ontology_tree.get_tree("data_properties")
    )

    ontology_tree.replace_tree("object_properties", object_prop_tree)
    ontology_tree.replace_tree("data_properties", data_prop_tree)

    with open(filename, "wb") as output:
        print(f"Saving as {filename}")
        pickle.dump(ontology_tree, output, pickle.HIGHEST_PROTOCOL)

In [3]:
# Carregar arquivo de ontologia como uma OntologyTrees
onto_trees = OntologyTrees(lang="pt-br")
onto_trees.load_ontology("movieontology.ttl", format="ttl")

# Nome das árvores geradas
print(onto_trees.trees_names)

# Profundidade máxima de cada árvore
for tree_name in onto_trees.trees_names:
    print(onto_trees.get_max_depth(tree_name, onto_trees.get_root(tree_name)))

['classes', 'object_properties', 'data_properties']
6
3
2


In [4]:
remove_nodes_without_results(onto_trees, "ontology_trees.pkl")

Saving as ontology_trees.pkl
