Skip to content

Commit

Permalink
LUBM dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
LorenzBuehmann committed Aug 27, 2016
1 parent de7fdb7 commit a64ab52
Show file tree
Hide file tree
Showing 3 changed files with 326 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**
* Copyright (C) 2007 - 2016, Jens Lehmann
*
* This file is part of DL-Learner.
*
* DL-Learner is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* DL-Learner is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dllearner.algorithms.qtl.experiments;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.aksw.jena_sparql_api.cache.h2.CacheUtilsH2;
import org.aksw.jena_sparql_api.core.FluentQueryExecutionFactory;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.http.QueryExecutionHttpWrapper;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.riot.WebContent;
import org.apache.jena.shared.PrefixMapping;
import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
import org.dllearner.algorithms.qtl.util.StopURIsOWL;
import org.dllearner.algorithms.qtl.util.StopURIsRDFS;
import org.dllearner.algorithms.qtl.util.filters.ObjectDropStatementFilter;
import org.dllearner.algorithms.qtl.util.filters.PredicateDropStatementFilter;
import org.dllearner.core.ComponentInitException;
import org.dllearner.kb.SparqlEndpointKS;
import org.dllearner.kb.sparql.SparqlEndpoint;
import org.dllearner.reasoning.SPARQLReasoner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;

/**
* @author Lorenz Buehmann
*
*/
public class LUBMEvaluationDataset extends EvaluationDataset {

private static final Logger log = LoggerFactory.getLogger(LUBMEvaluationDataset.class);

private static final String QUERIES_FILE = "src/test/resources/org/dllearner/algorithms/qtl/lubm_queries.txt";

public LUBMEvaluationDataset(File benchmarkDirectory, SparqlEndpoint endpoint) {
super("LUBM");
// set KS
File cacheDir = new File(benchmarkDirectory, "cache-" + getName());
QueryExecutionFactory qef = FluentQueryExecutionFactory
.http(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs())
.config().withPostProcessor(qe -> ((QueryEngineHTTP) ((QueryExecutionHttpWrapper) qe).getDecoratee())
.setModelContentType(WebContent.contentTypeRDFXML))
.end()
.create();
qef = CacheUtilsH2.createQueryExecutionFactory(qef, cacheDir.getAbsolutePath() + "/sparql/qtl-AAAI-cache;mv_store=false", false, TimeUnit.DAYS.toMillis(7) );
try {
ks = new SparqlEndpointKS(endpoint);
ks.setCacheDir(cacheDir.getAbsolutePath() + "/sparql/qtl-AAAI-cache;mv_store=false");
ks.setQueryExecutionFactory(qef);
ks.init();
} catch (ComponentInitException e) {
e.printStackTrace();
}

// read SPARQL queries
sparqlQueries = new ArrayList<>();
try {
List<String> lines = Files.readAllLines(Paths.get(QUERIES_FILE));

String query = "";
for (String line : lines) {
if(line.startsWith("#")) {
query = "";
} else if(line.isEmpty()) {
if(!query.isEmpty()) {
sparqlQueries.add(query);
query = "";
}
} else {
query += line + "\n";
}
}
} catch (IOException e) {
e.printStackTrace();
}

reasoner = new SPARQLReasoner(ks);
try {
reasoner.init();
} catch (ComponentInitException e) {
e.printStackTrace();
}

baseIRI = "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#";
prefixMapping = PrefixMapping.Factory.create().withDefaultMappings(PrefixMapping.Standard);
prefixMapping.setNsPrefix("ub", "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#");
}

@Override
@SuppressWarnings("unchecked")
public List<Predicate<Statement>> getQueryTreeFilters() {
return Lists.newArrayList(
new PredicateDropStatementFilter(StopURIsRDFS.get()),
new PredicateDropStatementFilter(StopURIsOWL.get()),
new ObjectDropStatementFilter(StopURIsOWL.get()),
new PredicateDropStatementFilter(
Sets.newHashSet(
"http://www.w3.org/2002/07/owl#equivalentClass",
"http://www.w3.org/2002/07/owl#disjointWith"))
);
}

public static void main(String[] args) throws Exception{
SparqlEndpoint endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql",
"http://lubm.org");
LUBMEvaluationDataset ds = new LUBMEvaluationDataset(new File("/tmp/test"), endpoint);
List<String> queries = ds.getSparqlQueries();
System.out.println(queries.size());
queries.forEach(q -> System.out.println(QueryFactory.create(q)));
queries.forEach(q -> System.out.println(ds.getKS().getQueryExecutionFactory().createQueryExecution(q).execSelect().hasNext()));


}

}
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,14 @@ public List<Predicate<Statement>> getQueryTreeFilters() {
);
}

public static void main(String[] args) {
List<String> queries = new QALDEvaluationDataset(new File("/tmp/test"), SparqlEndpoint.getEndpointDBpedia()).getSparqlQueries();
public static void main(String[] args) throws Exception{
SparqlEndpoint endpoint = SparqlEndpoint.create("http://sake.informatik.uni-leipzig.de:8890/sparql",
"http://dbpedia.org");
QALDEvaluationDataset ds = new QALDEvaluationDataset(new File("/tmp/test"), endpoint);
List<String> queries = ds.getSparqlQueries();
System.out.println(queries.size());
queries.forEach(q -> System.out.println(QueryFactory.create(q)));
queries.forEach(q -> System.out.println(ds.getKS().getQueryExecutionFactory().createQueryExecution(q).execSelect().hasNext()));


}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Query1
# This query bears large input and high selectivity. It queries about just one class and
# one property and does not assume any hierarchy information or inference.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:GraduateStudent .
?X ub:takesCourse <http://www.Department0.University0.edu/GraduateCourse0>}

# Query2
# This query increases in complexity: 3 classes and 3 properties are involved. Additionally,
# there is a triangular pattern of relationships between the objects involved.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y ?Z
WHERE
{?X rdf:type ub:GraduateStudent .
?Y rdf:type ub:University .
?Z rdf:type ub:Department .
?X ub:memberOf ?Z .
?Z ub:subOrganizationOf ?Y .
?X ub:undergraduateDegreeFrom ?Y}

# Query3
# This query is similar to Query 1 but class Publication has a wide hierarchy.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:Publication .
?X ub:publicationAuthor <http://www.Department0.University0.edu/AssistantProfessor0>}

# Query4
# This query has small input and high selectivity. It assumes subClassOf relationship
# between Professor and its subclasses. Class Professor has a wide hierarchy. Another
# feature is that it queries about multiple properties of a single class.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y1 ?Y2 ?Y3
WHERE
{?X rdf:type/rdfs:subClassOf* ub:Professor .
?X ub:worksFor <http://www.Department0.University0.edu> .
?X ub:name ?Y1 .
?X ub:emailAddress ?Y2 .
?X ub:telephone ?Y3}

# Query5
# This query assumes subClassOf relationship between Person and its subclasses
# and subPropertyOf relationship between memberOf and its subproperties.
# Moreover, class Person features a deep and wide hierarchy.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:Person .
?X ub:memberOf <http://www.Department0.University0.edu>}


# Query6
# This query queries about only one class. But it assumes both the explicit
# subClassOf relationship between UndergraduateStudent and Student and the
# implicit one between GraduateStudent and Student. In addition, it has large
# input and low selectivity.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X WHERE {?X rdf:type ub:Student}


# Query7
# This query is similar to Query 6 in terms of class Student but it increases in the
# number of classes and properties and its selectivity is high.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y
WHERE
{?X rdf:type ub:Student .
?Y rdf:type ub:Course .
?X ub:takesCourse ?Y .
<http://www.Department0.University0.edu/AssociateProfessor0> ub:teacherOf ?Y}


# Query8
# This query is further more complex than Query 7 by including one more property.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y ?Z
WHERE
{?X rdf:type ub:Student .
?Y rdf:type ub:Department .
?X ub:memberOf ?Y .
?Y ub:subOrganizationOf <http://www.University0.edu> .
?X ub:emailAddress ?Z}


# Query9
# Besides the aforementioned features of class Student and the wide hierarchy of
# class Faculty, like Query 2, this query is characterized by the most classes and
# properties in the query set and there is a triangular pattern of relationships.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y ?Z
WHERE
{?X rdf:type ub:Student .
?Y rdf:type ub:Faculty .
?Z rdf:type ub:Course .
?X ub:advisor ?Y .
?Y ub:teacherOf ?Z .
?X ub:takesCourse ?Z}


# Query10
# This query differs from Query 6, 7, 8 and 9 in that it only requires the
# (implicit) subClassOf relationship between GraduateStudent and Student, i.e.,
#subClassOf rela-tionship between UndergraduateStudent and Student does not add
# to the results.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:Student .
?X ub:takesCourse
<http://www.Department0.University0.edu/GraduateCourse0>}


# Query11
# Query 11, 12 and 13 are intended to verify the presence of certain OWL reasoning
# capabilities in the system. In this query, property subOrganizationOf is defined
# as transitive. Since in the benchmark data, instances of ResearchGroup are stated
# as a sub-organization of a Department individual and the later suborganization of
# a University individual, inference about the subOrgnizationOf relationship between
# instances of ResearchGroup and University is required to answer this query.
# Additionally, its input is small.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:ResearchGroup .
?X ub:subOrganizationOf <http://www.University0.edu>}


# Query12
# The benchmark data do not produce any instances of class Chair. Instead, each
# Department individual is linked to the chair professor of that department by
# property headOf. Hence this query requires realization, i.e., inference that
# that professor is an instance of class Chair because he or she is the head of a
# department. Input of this query is small as well.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X ?Y
WHERE
{?X rdf:type ub:Chair .
?Y rdf:type ub:Department .
?X ub:worksFor ?Y .
?Y ub:subOrganizationOf <http://www.University0.edu>}


# Query13
# Property hasAlumnus is defined in the benchmark ontology as the inverse of
# property degreeFrom, which has three subproperties: undergraduateDegreeFrom,
# mastersDegreeFrom, and doctoralDegreeFrom. The benchmark data state a person as
# an alumnus of a university using one of these three subproperties instead of
# hasAlumnus. Therefore, this query assumes subPropertyOf relationships between
# degreeFrom and its subproperties, and also requires inference about inverseOf.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE
{?X rdf:type ub:Person .
<http://www.University0.edu> ub:hasAlumnus ?X}


# Query14
# This query is the simplest in the test set. This query represents those with large input and low selectivity and does not assume any hierarchy information or inference.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
SELECT ?X
WHERE {?X rdf:type ub:UndergraduateStudent}

0 comments on commit a64ab52

Please sign in to comment.