Skip to content

Commit

Permalink
Improved tree filter
Browse files Browse the repository at this point in the history
  • Loading branch information
LorenzBuehmann committed Aug 28, 2016
1 parent 34fb62f commit 8443717
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 22 deletions.
Expand Up @@ -20,7 +20,10 @@

import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;

import com.google.common.collect.Sets;
import org.apache.jena.vocabulary.RDF;
import org.dllearner.algorithms.qtl.QueryTreeUtils;
import org.dllearner.algorithms.qtl.datastructures.impl.RDFResourceTree;
import org.dllearner.algorithms.qtl.util.Entailment;
Expand All @@ -30,6 +33,7 @@
/**
* A query tree filter that removes edges whose existence is supposed to be
* semantically meaningless from user perspective.
*
* @author Lorenz Buehmann
*
*/
Expand All @@ -54,6 +58,7 @@ public void setExistentialMeaninglessProperties(Set<Node> existentialMeaningless

/**
* Returns a new tree based on the input tree.
*
* @param tree the input tree
* @return a filtered new tree
*/
Expand All @@ -67,10 +72,18 @@ public RDFResourceTree filter(RDFResourceTree tree) {

for(Node edge : tree.getEdges()) {
if(existentialMeaninglessProperties.contains(edge)) {
// if the edge is meaningless
// 1. process all children
for (RDFResourceTree child : tree.getChildren(edge)) {
if(child.isResourceNode() || child.isLiteralValueNode() || !child.isLeaf()) {
if(child.isResourceNode() || child.isLiteralValueNode()) {
RDFResourceTree newChild = filter(child);
newTree.addChild(newChild, edge);
} else {
RDFResourceTree newChild = filter(child);
SortedSet<Node> childEdges = newChild.getEdges();
if(!childEdges.isEmpty() && !(childEdges.size() == 1 && childEdges.contains(RDF.type.asNode()))) {
newTree.addChild(newChild, edge);
}
}
}
} else {
Expand Down
Expand Up @@ -18,15 +18,6 @@
*/
package org.dllearner.algorithms.qtl.util.filters;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.dllearner.kb.SparqlEndpointKS;

import com.google.common.base.Charsets;
import com.google.common.io.Files;
import org.apache.jena.graph.Node;
Expand All @@ -36,6 +27,14 @@
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.sparql.util.NodeComparator;
import org.dllearner.kb.SparqlEndpointKS;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

/**
* @author Lorenz Buehmann
Expand Down
Expand Up @@ -488,6 +488,14 @@ public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInte
logger.info("got {} empty queries.", emptyQueries.size());
queries.removeAll(emptyQueries);

// min. pos examples
Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
.filter(e -> e.getValue().correctPosExampleCandidates.size() < 2)
.map(e -> e.getKey())
.collect(Collectors.toSet());
logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size());
queries.removeAll(lowNrOfExamplesQueries);


final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length * noiseIntervals.length * queries.size();
logger.info("#QTL runs: " + totalNrOfQTLRuns);
Expand Down Expand Up @@ -569,7 +577,7 @@ public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInte
// loop over SPARQL queries
for (final String sparqlQuery : queries) {

if(!(sparqlQuery.contains("Queen_Victoria")))continue;
// if(!(sparqlQuery.contains("Queen_Victoria")))continue;

tp.submit(() -> {

Expand Down Expand Up @@ -1439,11 +1447,11 @@ private List<RDFResourceTree> getQueryTrees(List<String> resources){

private RDFResourceTree getQueryTree(String resource){
// get CBD
logger.debug("loading data for {} ...", resource);
logger.info("loading data for {} ...", resource);
MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).start();
Model cbd = cbdGen.getConciseBoundedDescription(resource, maxTreeDepth);
MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).stop();
logger.debug("got {} triples in {}ms.", cbd.size(), MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getLastValue());
logger.info("got {} triples in {}ms.", cbd.size(), MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getLastValue());

// rewrite NAN to NaN to avoid parse exception
try(ByteArrayOutputStream baos = new ByteArrayOutputStream()){
Expand All @@ -1459,11 +1467,11 @@ private RDFResourceTree getQueryTree(String resource){


// generate tree
logger.debug("generating query tree for {} ...", resource);
logger.info("generating query tree for {} ...", resource);
MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).start();
RDFResourceTree tree = queryTreeFactory.getQueryTree(resource, cbd, maxTreeDepth);
MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).stop();
logger.debug("generating query tree for {} took {}ms.", resource, MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getLastValue());
logger.info("generating query tree for {} took {}ms.", resource, MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getLastValue());

// System.out.println(tree.getStringRepresentation());

Expand Down Expand Up @@ -1810,7 +1818,14 @@ private Score computeScore(String referenceSparqlQuery, RDFResourceTree tree, do
// apply some filters
QueryTreeUtils.removeVarLeafs(tree);
QueryTreeUtils.prune(tree, null, Entailment.RDF);


// remove redundant rdf:type triples
QueryTreeUtils.keepMostSpecificTypes(tree, dataset.getReasoner());

//
PredicateExistenceFilter filter = new PredicateExistenceFilterDBpedia(null);
tree = filter.filter(tree);

String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree, dataset.getBaseIRI(), dataset.getPrefixMapping());
logger.info("learned SPARQL query:{}", learnedSPARQLQuery);

Expand Down
Expand Up @@ -477,9 +477,9 @@ http://dbpedia.org/ontology/dbnlCodeDutch
http://dbpedia.org/ontology/dcc
http://dbpedia.org/ontology/deadInFightDate
http://dbpedia.org/ontology/deadInFightPlace
http://dbpedia.org/ontology/deathAge
http://dbpedia.org/ontology/deathDate
http://dbpedia.org/ontology/deathYear
#http://dbpedia.org/ontology/deathAge
#http://dbpedia.org/ontology/deathDate
#http://dbpedia.org/ontology/deathYear
http://dbpedia.org/ontology/debut
http://dbpedia.org/ontology/dec
http://dbpedia.org/ontology/decay
Expand Down Expand Up @@ -2052,8 +2052,8 @@ http://dbpedia.org/ontology/daylightSavingTimeZone
http://dbpedia.org/ontology/deFactoLanguage
http://dbpedia.org/ontology/dean
http://dbpedia.org/ontology/deanery
http://dbpedia.org/ontology/deathCause
http://dbpedia.org/ontology/deathPlace
#http://dbpedia.org/ontology/deathCause
#http://dbpedia.org/ontology/deathPlace
http://dbpedia.org/ontology/debutTeam
http://dbpedia.org/ontology/debutWork
http://dbpedia.org/ontology/decoration
Expand Down Expand Up @@ -2742,7 +2742,7 @@ http://dbpedia.org/ontology/sportCountry
http://dbpedia.org/ontology/sportDiscipline
http://dbpedia.org/ontology/sportGoverningBody
http://dbpedia.org/ontology/sportSpecialty
http://dbpedia.org/ontology/spouse
#http://dbpedia.org/ontology/spouse
http://dbpedia.org/ontology/spurOf
http://dbpedia.org/ontology/spurType
http://dbpedia.org/ontology/stadium
Expand Down

0 comments on commit 8443717

Please sign in to comment.