Skip to content

Commit

Permalink
Fixed DUMP federation/synchronization bugs and Theme/Subject
Browse files Browse the repository at this point in the history
deserialization.
  • Loading branch information
emastrosimone committed Sep 10, 2018
1 parent 692dc3c commit 3dd00e6
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 63 deletions.
1 change: 1 addition & 0 deletions Idra/src/main/java/it/eng/idra/beans/dcat/SKOSConcept.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public SKOSConcept(String propertyUri, String resourceUri, List<SKOSPrefLabel> p
setPrefLabel(prefLabel != null ? prefLabel : Arrays.asList(new SKOSPrefLabel()));
}


/*
* @Id
*
Expand Down
14 changes: 7 additions & 7 deletions Idra/src/main/java/it/eng/idra/beans/dcat/SKOSConceptStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
@Entity
@Table(name = "status")
@DiscriminatorValue("3")
public class SKOSConceptStatus extends SKOSConcept{
public class SKOSConceptStatus extends SKOSConcept {

/**
*
Expand All @@ -40,15 +40,15 @@ public class SKOSConceptStatus extends SKOSConcept{
public SKOSConceptStatus() {
super();
}

public SKOSConceptStatus(SKOSConcept concept) {
super(concept.getPropertyUri(),concept.getPropertyUri(),concept.getPrefLabel(),concept.getNodeID());
super(concept.getPropertyUri(), concept.getResourceUri(), concept.getPrefLabel(), concept.getNodeID());
}

public SKOSConceptStatus(String propertyUri, String resourceUri, List<SKOSPrefLabel> prefLabel, String nodeID) {
super(propertyUri,resourceUri,prefLabel,nodeID);
super(propertyUri, resourceUri, prefLabel, nodeID);
}

public static SKOSConceptStatus jsonToSKOSConcept(JSONObject obj, String propertyUri, String nodeID) {

return new SKOSConceptStatus(propertyUri, obj.optString("resourceUri"),
Expand All @@ -60,5 +60,5 @@ public static SKOSConceptStatus docToSKOSConcept(SolrDocument doc, String proper
return new SKOSConceptStatus(propertyUri, (String) doc.getFieldValue("resourceUri"), SKOSPrefLabel
.jsonArrayToPrefLabelList(new JSONArray(doc.getFieldValue("prefLabel").toString()), nodeID), nodeID);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
@Entity
@Table(name = "subject")
@DiscriminatorValue("2")
public class SKOSConceptSubject extends SKOSConcept{
public class SKOSConceptSubject extends SKOSConcept {

/**
*
Expand All @@ -40,15 +40,15 @@ public class SKOSConceptSubject extends SKOSConcept{
public SKOSConceptSubject() {
super();
}

public SKOSConceptSubject(SKOSConcept concept) {
super(concept.getPropertyUri(),concept.getPropertyUri(),concept.getPrefLabel(),concept.getNodeID());
super(concept.getPropertyUri(), concept.getResourceUri(), concept.getPrefLabel(), concept.getNodeID());
}

public SKOSConceptSubject(String propertyUri, String resourceUri, List<SKOSPrefLabel> prefLabel, String nodeID) {
super(propertyUri,resourceUri,prefLabel,nodeID);
super(propertyUri, resourceUri, prefLabel, nodeID);
}

public static SKOSConceptSubject jsonToSKOSConcept(JSONObject obj, String propertyUri, String nodeID) {

return new SKOSConceptSubject(propertyUri, obj.optString("resourceUri"),
Expand All @@ -60,5 +60,5 @@ public static SKOSConceptSubject docToSKOSConcept(SolrDocument doc, String prope
return new SKOSConceptSubject(propertyUri, (String) doc.getFieldValue("resourceUri"), SKOSPrefLabel
.jsonArrayToPrefLabelList(new JSONArray(doc.getFieldValue("prefLabel").toString()), nodeID), nodeID);
}

}
12 changes: 6 additions & 6 deletions Idra/src/main/java/it/eng/idra/beans/dcat/SKOSConceptTheme.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ public class SKOSConceptTheme extends SKOSConcept {
public SKOSConceptTheme() {
super();
}

public SKOSConceptTheme(SKOSConcept concept) {
super(concept.getPropertyUri(),concept.getPropertyUri(),concept.getPrefLabel(),concept.getNodeID());
super(concept.getPropertyUri(), concept.getResourceUri(), concept.getPrefLabel(), concept.getNodeID());
}

public SKOSConceptTheme(String propertyUri, String resourceUri, List<SKOSPrefLabel> prefLabel, String nodeID) {
super(propertyUri,resourceUri,prefLabel,nodeID);
super(propertyUri, resourceUri, prefLabel, nodeID);
}

public static SKOSConceptTheme jsonToSKOSConcept(JSONObject obj, String propertyUri, String nodeID) {

return new SKOSConceptTheme(propertyUri, obj.optString("resourceUri"),
Expand All @@ -60,5 +60,5 @@ public static SKOSConceptTheme docToSKOSConcept(SolrDocument doc, String propert
return new SKOSConceptTheme(propertyUri, (String) doc.getFieldValue("resourceUri"), SKOSPrefLabel
.jsonArrayToPrefLabelList(new JSONArray(doc.getFieldValue("prefLabel").toString()), nodeID), nodeID);
}

}
57 changes: 32 additions & 25 deletions Idra/src/main/java/it/eng/idra/connectors/DCATDumpConnector.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
import org.apache.http.util.EntityUtils;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.riot.RiotException;
import org.apache.jena.vocabulary.DCAT;
import org.apache.jena.vocabulary.RDF;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import it.eng.idra.beans.ODFProperty;
Expand Down Expand Up @@ -154,41 +158,44 @@ private List<DCATDataset> getDatasetsFromDumpString(String dumpString) throws Ex

// Pass the Node Host as base URI for the model
Model m = deserializer.dumpToModel(dumpString, node);
Matcher matcher = deserializer.getDatasetPattern(node.getDcatFormat()).matcher(dumpString);
StmtIterator sIt = m.listStatements(null, RDF.type, DCAT.Dataset);
// Matcher matcher =
// deserializer.getDatasetPattern(node.getDcatFormat()).matcher(dumpString);
String datasetURI = null;
int hits = 0;
while (matcher.find()) {

while (sIt.hasNext()) {
datasetURI = null;
try {

switch (node.getDcatFormat()) {

case TURTLE:
datasetURI = matcher.group(1);
break;

// RDFXML is the default
default:
datasetURI = matcher.group(1);
break;

}

if (StringUtils.isNotBlank(datasetURI)) {

Resource r = m.getResource(datasetURI);
datasetsList.add(deserializer.resourceToDataset(nodeID, r));
}
//
// switch (node.getDcatFormat()) {
//
// case TURTLE:
// datasetURI = matcher.group(1);
// break;
//
// // RDFXML is the default
// default:
// datasetURI = matcher.group(1);
// break;
//
// }
//
// if (StringUtils.isNotBlank(datasetURI)) {
//
// Resource r = m.getResource(datasetURI);
// System.out.println(r.getLocalName());
// datasetsList.add(deserializer.resourceToDataset(nodeID, r));
//
Statement s = sIt.nextStatement();
Resource datasetResource = m.getResource(s.getSubject().getURI());
datasetsList.add(deserializer.resourceToDataset(nodeID, datasetResource));

} catch (Exception e) {
logger.info("Skipped dataset - There was an error: " + e.getMessage() + " while deserializing dataset: "
+ datasetURI);
System.out.println(hits++);

}
}


if (datasetsList.size() != 0) {
DCATAPSerializer.writeModelToFile(m, DCATAPFormat.RDFXML, odmsDumpFilePath, "dumpFileString_" + nodeID);
node.setDumpFilePath(odmsDumpFilePath + "dumpFileString_" + nodeID);
Expand Down
39 changes: 29 additions & 10 deletions Idra/src/main/java/it/eng/idra/dcat/dump/DCATAPDeserializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.rdf.model.ResourceRequiredException;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RiotException;
import org.apache.jena.shared.PropertyNotFoundException;
import org.apache.jena.sparql.vocabulary.FOAF;
Expand Down Expand Up @@ -69,11 +69,12 @@

public class DCATAPDeserializer implements IDCATAPDeserialize {

private static final Pattern rdfDatasetPattern = Pattern.compile("\\w*<dcat:Dataset rdf:about=\\\"(.*)\\\"");
private static final Pattern turtleDatasetPattern = Pattern.compile("<(.*)>\\R\\s*a dcat:Dataset");
protected static final Pattern rdfDatasetPattern = Pattern.compile("\\w*<dcat:Dataset rdf:about=\\\"(.*)\\\"");
protected static final Pattern turtleDatasetPattern = Pattern.compile("<(.*)>\\R\\s*a dcat:Dataset");

private static final String GEO_BASE_URI = "http://publications.europa.eu/mdr/authority/place";
private static final String GEO_BASE_URI_ALT = "http://www.geonames.org";
protected static final String THEME_BASE_URI = "http://publications.europa.eu/resource/authority/data-theme/";
protected static final String GEO_BASE_URI = "http://publications.europa.eu/mdr/authority/place";
protected static final String GEO_BASE_URI_ALT = "http://www.geonames.org";

public DCATAPDeserializer() {
}
Expand Down Expand Up @@ -313,20 +314,27 @@ public <T extends SKOSConcept> List<T> deserializeConcept(String nodeID, Resourc
Statement labelS = labelIt.next();
labelList.add(new SKOSPrefLabel(labelS.getLanguage(), labelS.getString(), nodeID));
}

// For theme, the label is the Final label. e.g. http://publications.europa.eu/resource/authority/data-theme/GOVE
} else if (toExtractP.getURI().equals(DCAT.theme.getURI())) {
String extractedLabel = extractThemeFromURI(conceptURI);
labelList = new ArrayList<SKOSPrefLabel>();
labelList.add(new SKOSPrefLabel("ENG", extractedLabel, nodeID));

// For subject, the label is the entire URI. e.g. http://eurovoc.europa.eu/106
} else if (toExtractP.getURI().equals(DCTerms.subject.getURI())) {
String extractedLabel = conceptURI;
labelList = new ArrayList<SKOSPrefLabel>();
labelList.add(new SKOSPrefLabel("ENG", extractedLabel, nodeID));
}

try {
conceptList.add(type.getDeclaredConstructor(SKOSConcept.class)
.newInstance(new SKOSConcept(toExtractP.getURI(), conceptURI, labelList, nodeID)));
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// if (!IRIFactory.iriImplementation().create(conceptURI).hasViolation(false))
// conceptList.add(extractThemeFromURI(conceptURI));
// else
// conceptList.add(conceptURI);
}
}
return conceptList;
Expand Down Expand Up @@ -782,6 +790,17 @@ public String extractThemeFromURI(String uri) {

}

public String extractSubjectFromURI(String uri) {

Matcher matcher = Pattern
.compile("http:\\/\\/eurovoc\\.europa\\.eu(\\/|#)(\\w*)")
.matcher(uri);
String result = null;

return (matcher.find() && (result = matcher.group(2)) != null) ? result : "";

}

public String extractLanguageFromURI(String uri) {

Matcher matcher = Pattern
Expand Down
19 changes: 12 additions & 7 deletions Idra/src/main/java/it/eng/idra/dcat/dump/DCATAPITDeserializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public DCATDataset resourceToDataset(String nodeID, Resource datasetResource)
description = datasetResource.getRequiredProperty(DCTerms.description).getString();

// Handle theme concepts
theme = deserializeConcept(nodeID, datasetResource, DCAT.theme,SKOSConceptTheme.class);
theme = deserializeConcept(nodeID, datasetResource, DCAT.theme, SKOSConceptTheme.class);

publisher = deserializeFOAFAgent(nodeID, datasetResource.getProperty(DCTerms.publisher));

Expand Down Expand Up @@ -168,7 +168,12 @@ public DCATDataset resourceToDataset(String nodeID, Resource datasetResource)
// Iterate over source properties
StmtIterator sourceIt = datasetResource.listProperties(DCTerms.source);
while (sourceIt.hasNext()) {
source.add(sourceIt.next().getString());
Statement sourceStm = sourceIt.next();
try {
source.add(sourceStm.getString());
} catch (LiteralRequiredException e) {
source.add(sourceStm.getResource().getURI());
}
}

// Handle spatial property
Expand All @@ -191,7 +196,7 @@ public DCATDataset resourceToDataset(String nodeID, Resource datasetResource)
}

// Handle subject concepts
subject = deserializeConcept(nodeID, datasetResource, DCTerms.subject,SKOSConceptSubject.class);
subject = deserializeConcept(nodeID, datasetResource, DCTerms.subject, SKOSConceptSubject.class);

// Handle RightsHolder
if (datasetResource.hasProperty(DCTerms.rightsHolder))
Expand Down Expand Up @@ -263,9 +268,9 @@ public DCTPeriodOfTime deserializeTemporalCoverage(String nodeID, Resource datas
return null;
}

// @Override
// public Pattern getDatasetPattern() {
// return datasetPattern;
// }
// @Override
// public Pattern getDatasetPattern() {
// return datasetPattern;
// }

}
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ protected static boolean synchODMSNode(ODMSCatalogue node) throws SQLException,
} else if (node.getNodeType().equals(ODMSCatalogueType.WEB)) {
synchWebODMSNode(node);
} else if (node.getNodeType().equals(ODMSCatalogueType.DCATDUMP)) {
synchDUMPODMSNode(node);
// Do nothing for node type DUMP
// synchDUMPODMSNode(node);
}

} catch (Exception e) {
Expand Down

0 comments on commit 3dd00e6

Please sign in to comment.