Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Oai harvesting setup #2491

Merged
merged 24 commits into from Oct 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ad59842
Add endpoint to update collection harvest settings
jpelgrims-atmire Aug 5, 2019
73865a9
Refactor harvest settings endpoint after manual testing
jpelgrims-atmire Aug 5, 2019
0bb2135
Add Integration tests for harvest settings endpoint
jpelgrims-atmire Aug 5, 2019
0c6f537
Fix checkstyle errors
jpelgrims-atmire Aug 6, 2019
251d264
Implemented changes
jpelgrims-atmire Aug 6, 2019
361f48b
Add settings verification to harvest settings endpoint
jpelgrims-atmire Aug 7, 2019
07a8e33
Add integration test for harvest settings verification
jpelgrims-atmire Aug 7, 2019
99bc1b7
Add endpoint for harvesting configurations
jpelgrims-atmire Aug 7, 2019
234ef08
Add integration test for harvester metadata format endpoint
jpelgrims-atmire Aug 8, 2019
38679be
Implement fixes/tweaks and add javadocs
jpelgrims-atmire Aug 9, 2019
d401af5
Update javadoc author tag email
jpelgrims-atmire Aug 9, 2019
d247028
Add endpoint to retrieve collection harvest settings
jpelgrims-atmire Aug 13, 2019
1831c4f
Fix small issues
jpelgrims-atmire Aug 13, 2019
0f4ea91
Clean up code added in previous commits
jpelgrims-atmire Aug 21, 2019
f5561a2
Add small changes
jpelgrims-atmire Aug 21, 2019
8c60fbb
Fix CollectionharvestSettingsController permissions
jpelgrims-atmire Aug 21, 2019
2c02568
Added the harvester link to the CollectionResource
Raf-atmire Sep 3, 2019
e26d5fe
Merge remote-tracking branch 'dspace/master' into oai-harvesting-setup
Raf-atmire Sep 4, 2019
821e917
[Task 64789] fixed empty response from update on harvest collections …
Raf-atmire Sep 4, 2019
53a4043
[Task 64789] added nullcheck for metadata configs
Raf-atmire Sep 4, 2019
9a02695
Applied feedback
Raf-atmire Sep 17, 2019
3df6f84
Applied feedback
Raf-atmire Sep 20, 2019
56fc8d5
Applied feedback
Raf-atmire Sep 27, 2019
2202195
Altered wrongly defined URL in CollectionHarvesterSettingsControllerIT
Raf-atmire Oct 7, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions dspace-api/src/main/java/org/dspace/app/harvest/Harvest.java
Expand Up @@ -460,7 +460,7 @@ private static void pingResponder(String server, String set, String metadataForm
List<String> errors;

System.out.print("Testing basic PMH access: ");
errors = OAIHarvester.verifyOAIharvester(server, set,
errors = harvestedCollectionService.verifyOAIharvester(server, set,
(null != metadataFormat) ? metadataFormat : "dc", false);
if (errors.isEmpty()) {
System.out.println("OK");
Expand All @@ -471,7 +471,7 @@ private static void pingResponder(String server, String set, String metadataForm
}

System.out.print("Testing ORE support: ");
errors = OAIHarvester.verifyOAIharvester(server, set,
errors = harvestedCollectionService.verifyOAIharvester(server, set,
(null != metadataFormat) ? metadataFormat : "dc", true);
if (errors.isEmpty()) {
System.out.println("OK");
Expand Down
Expand Up @@ -7,16 +7,28 @@
*/
package org.dspace.harvest;

import static org.dspace.harvest.OAIHarvester.OAI_ADDRESS_ERROR;
import static org.dspace.harvest.OAIHarvester.OAI_DMD_ERROR;
import static org.dspace.harvest.OAIHarvester.OAI_ORE_ERROR;
import static org.dspace.harvest.OAIHarvester.OAI_SET_ERROR;

import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;

import ORG.oclc.oai.harvester2.verb.Identify;
import ORG.oclc.oai.harvester2.verb.ListIdentifiers;
import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.harvest.dao.HarvestedCollectionDAO;
import org.dspace.harvest.service.HarvestedCollectionService;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.input.DOMBuilder;
import org.springframework.beans.factory.annotation.Autowired;

/**
Expand All @@ -27,6 +39,10 @@
* @author kevinvandevelde at atmire.com
*/
public class HarvestedCollectionServiceImpl implements HarvestedCollectionService {

private static final Namespace ORE_NS = Namespace.getNamespace("http://www.openarchives.org/ore/terms/");
private static final Namespace OAI_NS = Namespace.getNamespace("http://www.openarchives.org/OAI/2.0/");

@Autowired(required = true)
protected HarvestedCollectionDAO harvestedCollectionDAO;

Expand Down Expand Up @@ -156,5 +172,94 @@ public boolean exists(Context context) throws SQLException {
return 0 < harvestedCollectionDAO.count(context);
}

/**
* Verify the existence of an OAI server with the specified set and
* supporting the provided metadata formats.
*
* @param oaiSource the address of the OAI-PMH provider
* @param oaiSetId OAI set identifier
* @param metaPrefix OAI metadataPrefix
* @param testORE whether the method should also check the PMH provider for ORE support
* @return list of errors encountered during verification. Empty list indicates a "success" condition.
*/
public List<String> verifyOAIharvester(String oaiSource,
String oaiSetId, String metaPrefix, boolean testORE) {
List<String> errorSet = new ArrayList<String>();

// First, see if we can contact the target server at all.
try {
new Identify(oaiSource);
} catch (Exception ex) {
errorSet.add(OAI_ADDRESS_ERROR + ": OAI server could not be reached.");
return errorSet;
}

// Next, make sure the metadata we need is supported by the target server
Namespace DMD_NS = OAIHarvester.getDMDNamespace(metaPrefix);
if (null == DMD_NS) {
errorSet.add(OAI_DMD_ERROR + ": " + metaPrefix);
return errorSet;
}

String OREOAIPrefix = null;
String DMDOAIPrefix = null;

try {
OREOAIPrefix = OAIHarvester.oaiResolveNamespaceToPrefix(oaiSource, OAIHarvester.getORENamespace().getURI());
DMDOAIPrefix = OAIHarvester.oaiResolveNamespaceToPrefix(oaiSource, DMD_NS.getURI());
} catch (Exception ex) {
errorSet.add(OAI_ADDRESS_ERROR
+ ": OAI did not respond to ListMetadataFormats query ("
+ ORE_NS.getPrefix() + ":" + OREOAIPrefix + " ; "
+ DMD_NS.getPrefix() + ":" + DMDOAIPrefix + "): "
+ ex.getMessage());
return errorSet;
}

if (testORE && OREOAIPrefix == null) {
errorSet.add(OAI_ORE_ERROR + ": The OAI server does not support ORE dissemination");
}
if (DMDOAIPrefix == null) {
errorSet.add(OAI_DMD_ERROR + ": The OAI server does not support dissemination in this format");
}

// Now scan the sets and make sure the one supplied is in the list
boolean foundSet = false;
try {
//If we do not want to harvest from one set, then skip this.
if (!"all".equals(oaiSetId)) {
ListIdentifiers ls = new ListIdentifiers(oaiSource, null, null, oaiSetId, DMDOAIPrefix);

// The only error we can really get here is "noSetHierarchy"
if (ls.getErrors() != null && ls.getErrors().getLength() > 0) {
for (int i = 0; i < ls.getErrors().getLength(); i++) {
String errorCode = ls.getErrors().item(i).getAttributes().getNamedItem("code").getTextContent();
errorSet.add(
OAI_SET_ERROR + ": The OAI server does not have a set with the specified setSpec (" +
errorCode + ")");
}
} else {
// Drilling down to /OAI-PMH/ListSets/set
DOMBuilder db = new DOMBuilder();
Document reply = db.build(ls.getDocument());
Element root = reply.getRootElement();
//Check if we can find items, if so this indicates that we have children and our sets exist
foundSet = 0 < root.getChild("ListIdentifiers", OAI_NS).getChildren().size();

if (!foundSet) {
errorSet.add(OAI_SET_ERROR + ": The OAI server does not have a set with the specified setSpec");
}
}
}
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
errorSet.add(OAI_ADDRESS_ERROR + ": OAI server could not be reached");
return errorSet;
}

return errorSet;
}


}