Skip to content

Commit

Permalink
CSW harvester / performance improvements
Browse files Browse the repository at this point in the history
- Increase GetRecords max records parameter to 100
- Use GetRecords with ElementSetName FULL to retrieve the full xml and avoid individual GetRecordById requests

Includes Sonarlint improvements.

Fixes geonetwork#7995
  • Loading branch information
josegar74 committed May 19, 2024
1 parent ca92682 commit 979e2ac
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 48 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//=============================================================================
//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the
//=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
//=== and United Nations Environment Programme (UNEP)
//===
Expand Down Expand Up @@ -57,12 +57,20 @@ public class RecordInfo {
//---------------------------------------------------------------------------
public String isTemplate;

//---------------------------------------------------------------------------

public String data;

//-----------------------------------------------------------------------------
private boolean dateWasNull;
//---------------------------------------------------------------------------

public RecordInfo(String uuid, String changeDate) {
this(uuid, changeDate, null, null);
this(uuid, changeDate, null, null, null);
}

public RecordInfo(String uuid, String changeDate, String data) {
this(uuid, changeDate, null, null, data);
}

//---------------------------------------------------------------------------
Expand All @@ -72,6 +80,10 @@ public RecordInfo(String uuid, String changeDate) {
//---------------------------------------------------------------------------

public RecordInfo(String uuid, String changeDate, String schema, String source) {
this(uuid, changeDate, schema, source, null);
}

public RecordInfo(String uuid, String changeDate, String schema, String source, String data) {
if (changeDate == null) {
dateWasNull = true;
changeDate = new ISODate().toString();
Expand All @@ -81,6 +93,7 @@ public RecordInfo(String uuid, String changeDate, String schema, String source)
this.changeDate = changeDate;
this.schema = schema;
this.source = source;
this.data = data;
}
public RecordInfo(Element record) {
id = record.getChildText("id");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//=============================================================================
//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the
//=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
//=== and United Nations Environment Programme (UNEP)
//===
Expand Down Expand Up @@ -282,7 +282,7 @@ private void addMetadata(RecordInfo ri, String uuidToAssign) throws Exception {
return;
}

Element md = retrieveMetadata(ri.uuid);
Element md = retrieveMetadata(ri);

if (md == null) {
result.unretrievable++;
Expand Down Expand Up @@ -439,7 +439,7 @@ private void updateMetadata(RecordInfo ri, String id, Boolean force) throws Exce
}
@Transactional(value = TxType.REQUIRES_NEW)
boolean updatingLocalMetadata(RecordInfo ri, String id, Boolean force) throws Exception {
Element md = retrieveMetadata(ri.uuid);
Element md = retrieveMetadata(ri);

if (md == null) {
result.unchangedMetadata++;
Expand Down Expand Up @@ -495,58 +495,47 @@ boolean updatingLocalMetadata(RecordInfo ri, String id, Boolean force) throws Ex
}

/**
* Does CSW GetRecordById request. If validation is requested and the metadata does not
* Returns the metadata xml. If validation is requested and the metadata does not
* validate, null is returned.
*
* @param uuid uuid of metadata to request
* @return metadata the metadata
* @param recordInfo metadata information
* @return the metadata JDOM Element
*/
private Element retrieveMetadata(String uuid) {
private Element retrieveMetadata(RecordInfo recordInfo) {
String uuid = recordInfo.uuid;

request.clearIds();
request.addId(uuid);

try {
Element xml = Xml.loadString(recordInfo.data, false);
log.debug("Getting record from : " + request.getHost() + " (uuid:" + uuid + ")");

Element response = request.execute();
if (log.isDebugEnabled()) {
log.debug("Record got: " + Xml.getString(response) + "\n");
}

@SuppressWarnings("unchecked")
List<Element> list = response.getChildren();

//--- maybe the metadata has been removed

if (list.isEmpty()) {
return null;
log.debug(String.format("Record uuid %s: %s"), uuid, Xml.getString(xml));
}

response = list.get(0);
response = (Element) response.detach();


try {
Integer groupIdVal = null;
if (StringUtils.isNotEmpty(params.getOwnerIdGroup())) {
groupIdVal = getGroupOwner();
}

params.getValidate().validate(dataMan, context, response, groupIdVal);
params.getValidate().validate(dataMan, context, xml, groupIdVal);
} catch (Exception e) {
log.debug("Ignoring invalid metadata with uuid " + uuid);
result.doesNotValidate++;
return null;
}

if (params.rejectDuplicateResource) {
if (foundDuplicateForResource(uuid, response)) {
if (foundDuplicateForResource(uuid, xml)) {
result.unchangedMetadata++;
return null;
}
}

return response;
return xml;
} catch (Exception e) {
log.error("Raised exception while getting record : " + e);
log.error(e);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//=============================================================================
//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the
//=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
//=== and United Nations Environment Programme (UNEP)
//===
Expand Down Expand Up @@ -66,18 +66,18 @@ class Harvester implements IHarvester<HarvestResult> {
// FIXME : Currently switch from POST to GET for testing mainly.
public static final String PREFERRED_HTTP_METHOD = AbstractHttpRequest.Method.POST.toString();

private final static String ATTRIB_SEARCHRESULT_MATCHED = "numberOfRecordsMatched";
private static final String ATTRIB_SEARCHRESULT_MATCHED = "numberOfRecordsMatched";

private final static String ATTRIB_SEARCHRESULT_RETURNED = "numberOfRecordsReturned";
private static final String ATTRIB_SEARCHRESULT_RETURNED = "numberOfRecordsReturned";

private final static String ATTRIB_SEARCHRESULT_NEXT = "nextRecord";
private static final String ATTRIB_SEARCHRESULT_NEXT = "nextRecord";

private static int GETRECORDS_REQUEST_MAXRECORDS = 20;
private static final int GETRECORDS_REQUEST_MAXRECORDS = 100;

private static String CONSTRAINT_LANGUAGE_VERSION = "1.1.0";
private static final String CONSTRAINT_LANGUAGE_VERSION = "1.1.0";

//FIXME version should be parametrized
private static String GETCAPABILITIES_PARAMETERS = "SERVICE=CSW&REQUEST=GetCapabilities&VERSION=2.0.2";
private static final String GETCAPABILITIES_PARAMETERS = "SERVICE=CSW&REQUEST=GetCapabilities&VERSION=2.0.2";
private final AtomicBoolean cancelMonitor;

private Logger log;
Expand All @@ -87,7 +87,7 @@ class Harvester implements IHarvester<HarvestResult> {
/**
* Contains a list of accumulated errors during the executing of this harvest.
*/
private List<HarvestError> errors = new LinkedList<HarvestError>();
private List<HarvestError> errors = new LinkedList<>();


public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, CswParams params) {
Expand All @@ -110,7 +110,7 @@ public HarvestResult harvest(Logger log) throws Exception {

boolean error = false;
HarvestResult result = new HarvestResult();
Set<String> uuids = new HashSet<String>();
Set<String> uuids = new HashSet<>();
try {
Aligner aligner = new Aligner(cancelMonitor, context, server, params, log);
searchAndAlign(server, uuids, aligner, errors);
Expand Down Expand Up @@ -219,7 +219,7 @@ private void searchAndAlign(CswServer server, Set<String> uuids,
if (StringUtils.isNotEmpty(params.sortBy)) {
request.addSortBy(params.sortBy);
}
request.setElementSetName(ElementSetName.SUMMARY);
request.setElementSetName(ElementSetName.FULL);
request.setMaxRecords(GETRECORDS_REQUEST_MAXRECORDS);
request.setDistribSearch(params.queryScope.equalsIgnoreCase("distributed"));
request.setHopCount(params.hopCount);
Expand Down Expand Up @@ -283,10 +283,10 @@ private void searchAndAlign(CswServer server, Set<String> uuids,
int foundCnt = 0;

log.debug("Extracting all elements in the csw harvesting response");
Set<RecordInfo> records = new LinkedHashSet<RecordInfo>();
for (Element record : list) {
Set<RecordInfo> records = new LinkedHashSet<>();
for (Element recordElement : list) {
try {
RecordInfo recInfo = getRecordInfo((Element) record.clone());
RecordInfo recInfo = getRecordInfo((Element) recordElement.clone());

if (recInfo != null) {
records.add(recInfo);
Expand All @@ -297,7 +297,7 @@ private void searchAndAlign(CswServer server, Set<String> uuids,
errors.add(new HarvestError(context, ex));
log.error("Unable to process record from csw (" + this.params.getName() + ")");
log.error(" Record failed: " + foundCnt);
log.debug(" Record: " + ((Element) record).getName());
log.debug(" Record: " + recordElement.getName());
}

}
Expand Down Expand Up @@ -442,7 +442,7 @@ private void configRequest(final GetRecordsRequest request, final CswOperation o
}
}

public static ImmutableSet<String> bboxParameters;
public static final ImmutableSet<String> bboxParameters;
static {
bboxParameters = ImmutableSet.<String>builder()
.add("bbox-xmin")
Expand All @@ -453,8 +453,8 @@ private void configRequest(final GetRecordsRequest request, final CswOperation o
}
private String getFilterConstraint(final Search s) {
//--- collect queriables
ArrayList<Element> queriables = new ArrayList<Element>();
Map<String, Double> bboxCoordinates = new HashMap<String, Double>();
ArrayList<Element> queriables = new ArrayList<>();
Map<String, Double> bboxCoordinates = new HashMap<>();

if (!s.attributesMap.isEmpty()) {
for (Map.Entry<String, String> entry : s.attributesMap.entrySet()) {
Expand Down Expand Up @@ -594,7 +594,7 @@ private String getFilterConstraint(List<Element> filters, Element bboxFilter) th
bboxCoordinates.put("bbox-xmax", Double.parseDouble(bboxFilter.getChildText("bbox-xmax")));
bboxCoordinates.put("bbox-ymax", Double.parseDouble(bboxFilter.getChildText("bbox-ymax")));

if (cswFilter.getChildren().size() == 0) {
if (cswFilter.getChildren().isEmpty()) {
cswFilter.addContent(buildBboxFilter(bboxCoordinates));
} else {
Element filterContent = ((Element) cswFilter.getChildren().get(0));
Expand All @@ -607,7 +607,7 @@ private String getFilterConstraint(List<Element> filters, Element bboxFilter) th
}
}

if (cswFilter.getChildren().size() == 0) {
if (cswFilter.getChildren().isEmpty()) {
return StringUtils.EMPTY;
} else {
return Xml.getString(cswFilter);
Expand All @@ -617,7 +617,7 @@ private String getFilterConstraint(List<Element> filters, Element bboxFilter) th
private String getCqlConstraint(List<Element> filters, Element bboxFilter) throws Exception {
String cqlFilter = "";

if (filters.size() > 0) {
if (!filters.isEmpty()) {
Path file = context.getAppPath().resolve("xml").resolve("csw").resolve("harvester-csw-cql.xsl");

Element eltFilter = new Element("filters");
Expand Down Expand Up @@ -724,7 +724,9 @@ private RecordInfo getRecordInfo(Element record) {
if (modified.length() == 0) modified = null;
if (log.isDebugEnabled())
log.debug("getRecordInfo: adding " + identif + " with modification date " + modified);
return new RecordInfo(identif, modified);


return new RecordInfo(identif, modified, Xml.getString(record));
} catch (Exception e) {
log.warning("Skipped record not in supported format : " + name);
}
Expand Down

0 comments on commit 979e2ac

Please sign in to comment.