Skip to content

Commit

Permalink
Fixed Bug in CKAN synch
Browse files Browse the repository at this point in the history
  • Loading branch information
robcalla committed Sep 24, 2018
1 parent f67bd30 commit 11c1798
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 36 deletions.
7 changes: 4 additions & 3 deletions Idra/src/main/java/it/eng/idra/beans/dcat/DCATDataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ public DCATDataset(String nodeID,String identifier, String title, String descrip
super();
setId(CommonUtil.extractSeoIdentifier(title, UUID.randomUUID().toString(),nodeID));
setNodeID(nodeID);

setIdentifier(new DCATProperty(DCTerms.identifier, RDFS.Literal.getURI(), identifier));

setDistributions(distributions);
setTitle(new DCATProperty(DCTerms.title, RDFS.Literal.getURI(), title));
setDescription(new DCATProperty(DCTerms.description, RDFS.Literal.getURI(), description));
Expand Down Expand Up @@ -206,9 +209,7 @@ public DCATDataset(String nodeID,String identifier, String title, String descrip
StringUtils.isNotBlank(releaseDate) ? releaseDate : "1970-01-01T00:00:00Z"));
setUpdateDate(new DCATProperty(DCTerms.modified, RDFS.Literal.getURI(),
StringUtils.isNotBlank(updateDate) ? updateDate : "1970-01-01T00:00:00Z"));

setIdentifier(new DCATProperty(DCTerms.identifier, RDFS.Literal.getURI(), identifier));


setOtherIdentifier(otherIdentifier != null
? otherIdentifier.stream()
.map(item -> new DCATProperty(
Expand Down
16 changes: 6 additions & 10 deletions Idra/src/main/java/it/eng/idra/cache/MetadataCacheManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -219,32 +219,28 @@ public static List<String> getAllDatasetsID() throws SolrServerException, IOExce
* @throws DatasetNotFoundException
* @returns List<String> the list of the Datasets ID of the node
*/
public static List<String> getAllDatasetsIDByODMSCatalogue(int nodeId, boolean nativeID)
public static HashMap<String,ArrayList<String>> getCKANDatasetNamesIdentifiers(int nodeId)
throws DatasetNotFoundException, IOException, SolrServerException {
SolrQuery query = new SolrQuery();
QueryResponse rsp;
List<String> idList = new ArrayList<String>();
HashMap<String,ArrayList<String>> idMap = new HashMap<String,ArrayList<String>>();

query.setQuery("nodeID:" + nodeId);

query.set("parent_filter", "content_type:" + "dataset");
query.set("defType", "edismax");
query.addFilterQuery("{!parent which=$parent_filter}");
query.setParam("fl", (nativeID ? "otherIdentifier" : "id") + ",[child parentFilter=$parent_filter limit=1000]");
//query.setParam("fl", (nativeID ? "identifier" : "id") + ",[child parentFilter=$parent_filter limit=1000]");
query.setParam("fl", "otherIdentifier,identifier"+ ",[child parentFilter=$parent_filter limit=1000]");
query.set("rows", "1000000");
// query.set("fl", nativeID ? "otherIdentifier" : "id");

rsp = server.query(query);

for (SolrDocument doc : rsp.getResults()) {

if (nativeID)
idList.add(((ArrayList<String>) doc.getFieldValue("otherIdentifier")).get(0));
else
idList.add((String) doc.getFieldValue("id"));

idMap.put((String) doc.getFieldValue("identifier"),(ArrayList<String>) doc.getFieldValue("otherIdentifier"));
}
return idList;
return idMap;
}

/**
Expand Down
89 changes: 66 additions & 23 deletions Idra/src/main/java/it/eng/idra/connectors/CKanConnector.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import it.eng.idra.beans.dcat.DCATDataset;
import it.eng.idra.beans.dcat.DCATDistribution;
import it.eng.idra.beans.dcat.DCATProperty;
import it.eng.idra.beans.dcat.DCTLicenseDocument;
import it.eng.idra.beans.dcat.DCTLocation;
import it.eng.idra.beans.dcat.DCTPeriodOfTime;
Expand Down Expand Up @@ -58,9 +57,6 @@
import org.apache.logging.log4j.*;
import org.apache.solr.client.solrj.SolrServerException;
import org.ckan.*;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
Expand Down Expand Up @@ -539,9 +535,11 @@ private List<String> extractValueList(String value) {

// if(StringUtils.isBlank(value)) return null;


//TODO: regex & groups
List<String> result = new ArrayList<String>();

if(StringUtils.isBlank(value)) return result;

if (value.startsWith("["))
try {
result.addAll(GsonUtil.json2Obj(value, GsonUtil.stringListType));
Expand Down Expand Up @@ -587,6 +585,9 @@ private List<DCTStandard> extractConformsTo(String value) {
// if(StringUtils.isBlank(value)) return null;

List<DCTStandard> result=new ArrayList<DCTStandard>();

if(StringUtils.isBlank(value)) return result;

if (value.startsWith("["))
try {
result.addAll(GsonUtil.json2Obj(value, GsonUtil.stringListType));
Expand Down Expand Up @@ -799,18 +800,21 @@ public ODMSSynchronizationResult getChangedDatasets(List<DCATDataset> oldDataset
*
*/

List<String> oldDatasetsID = null;
String[] newDatasetsID = null;
HashMap<String,ArrayList<String>> idMap=null;
List<String> oldDatasetsID = new ArrayList<String>();
List<String> newDatasetsNames = new ArrayList<String>();
try {
logger.info("Starting to retrieve present datasets of the node from cache");
oldDatasetsID = MetadataCacheManager.getAllDatasetsIDByODMSCatalogue(node.getId(), true);

//oldDatasetsID = MetadataCacheManager.getAllDatasetsIDByODMSCatalogue(node.getId(), true);
idMap = MetadataCacheManager.getCKANDatasetNamesIdentifiers(node.getId());
oldDatasetsID=new ArrayList<String>(idMap.keySet());
} catch (DatasetNotFoundException | IOException | SolrServerException e) {
logger.info(e.getMessage());
}

try {
newDatasetsID = c.getAllDatasetsID();
try {
newDatasetsNames = Arrays.asList(c.getAllDatasetsID());

} catch (CKANException | MalformedURLException e) {
e.printStackTrace();
logger.info(e.getMessage());
Expand All @@ -820,32 +824,71 @@ public ODMSSynchronizationResult getChangedDatasets(List<DCATDataset> oldDataset
throw e;
}

// System.out.println("New dataset size: "+newDatasetsID.length);

ImmutableSet<String> newSets = ImmutableSet.copyOf(newDatasetsID);
ImmutableSet<String> oldSets = ImmutableSet.copyOf(oldDatasetsID);

//k -> identifier
//elements ->arrayList of name and other identifiers
int deleted = 0;
for(String k : idMap.keySet()) {
List<String> names = idMap.get(k);
boolean isPresent=false;
for(String n:names) {
if(newDatasetsNames.contains(n)) {
isPresent=true;
break;
}
}
if(!isPresent) {
//No match of identifiers in the new Array -> deleted
/* -> No perché in questo modo non funzionano cancella gli RDF dato che il dataset non ha distribution ->
* dobbiamo andare a prendere quelli della cache
DCATDataset deletedDataset = new DCATDataset();
deletedDataset.setNodeID(new Integer(node.getId()).toString());
deletedDataset.setIdentifier(new DCATProperty(DCTerms.identifier, RDFS.Literal.getURI(), k));
syncrhoResult.addToDeletedList(deletedDataset);
deleted++;*/

try {
//In questo modo quando deve cancellare ha il dataset con tutte le info
syncrhoResult.addToDeletedList(MetadataCacheManager.getDatasetByIdentifier(Integer.parseInt(nodeID), k));
deleted++;
} catch (NumberFormatException | DatasetNotFoundException | IOException | SolrServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}

}
logger.info("Deleted Packages: " + deleted);
/*
* OLD -> NO
*
* ImmutableSet<String> newSets = ImmutableSet.copyOf(newDatasetsIdentifiers);
ImmutableSet<String> oldSets = ImmutableSet.copyOf(oldDatasetsIdentifiers);
logger.info(" Start to compare the new and old dataset IDs lists: " + " New size: " + newSets.size()
+ " - Old size: " + oldSets.size());
SetView<String> diff = Sets.difference(oldSets, newSets);
int deleted = 0;
logger.info("Deleted Packages: " + diff.size());
for (String id : diff) {

for (String identifier : diff) {
DCATDataset deletedDataset = new DCATDataset();
deletedDataset.setId(id);
//deletedDataset.setId(id);
deletedDataset.setNodeID(new Integer(node.getId()).toString());
//This line is the cause of the nullpointerexception
deletedDataset.setOtherIdentifier(Arrays.asList(new DCATProperty("dcat:altIdentifier", id)));
deleted++;
syncrhoResult.addToDeletedList(deletedDataset);
}
}*/



/**********************************************************************************************/
/*
* FOR ADDED AND UPDATED DATASETS, RETRIEVES ALL DATASETS WITH METADATA CREATED
* AND MODIFIED AFTER THE LAST UPDATE DATE
*/

int changed = 0, added = 0, offset = 0;
Dataset.SearchResults result = null;

Expand Down Expand Up @@ -886,16 +929,16 @@ public ODMSSynchronizationResult getChangedDatasets(List<DCATDataset> oldDataset

for (Dataset d : result.results) {

if (oldDatasetsID.contains(d.getName())) {
if (oldDatasetsID.contains(d.getId())) {
syncrhoResult.addToChangedList(datasetToDCAT(d, node));
changed++;
} else if (newSets.contains(d.getName())) {
} else if (newDatasetsNames.contains(d.getName())) {
syncrhoResult.addToAddedList(datasetToDCAT(d, node));
added++;
}

}

logger.info("NodeID: " + nodeID + " Changed " + syncrhoResult.getChangedDatasets().size());
logger.info("NodeID: " + nodeID + " Added " + syncrhoResult.getAddedDatasets().size());
logger.info("NodeID: " + nodeID + " Deleted " + syncrhoResult.getDeletedDatasets().size());
Expand Down

0 comments on commit 11c1798

Please sign in to comment.