From 5c8ff2c74312c5c97a14c01e2efa4d8521c1c1cb Mon Sep 17 00:00:00 2001 From: Andrea Bollini Date: Sat, 20 Jun 2020 18:49:56 +0200 Subject: [PATCH 01/34] DS-4530 enforce the use of the latest override of the findOne when checking permission for embedding --- .../app/rest/converter/ConverterService.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java index fc786bfc851a..563f2045ca2a 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java @@ -149,14 +149,29 @@ private Annotation getAnnotationForRestObject(BaseObjectRest restObject) { DSpaceRestRepository repositoryToUse = utils .getResourceRepositoryByCategoryAndModel(baseObjectRest.getCategory(), baseObjectRest.getType()); Annotation preAuthorize = null; + int maxDepth = 0; for (Method m : repositoryToUse.getClass().getMethods()) { if (StringUtils.equalsIgnoreCase(m.getName(), "findOne")) { - preAuthorize = AnnotationUtils.findAnnotation(m, PreAuthorize.class); + int depth = howManySuperclass(m.getDeclaringClass()); + if (depth > maxDepth) { + preAuthorize = AnnotationUtils.findAnnotation(m, PreAuthorize.class); + maxDepth = depth; + } } } return preAuthorize; } + private int howManySuperclass(Class declaringClass) { + Class curr = declaringClass; + int count = 0; + while (curr != Object.class) { + curr = curr.getSuperclass(); + count++; + } + return count; + } + private Annotation getDefaultFindOnePreAuthorize() { for (Method m : DSpaceRestRepository.class.getMethods()) { if (StringUtils.equalsIgnoreCase(m.getName(), "findOne")) { From f50d0d664832142525d68ed22ae4e5ec935d9770 Mon Sep 17 00:00:00 2001 From: Andrea Bollini Date: Sat, 20 Jun 2020 18:50:36 +0200 Subject: [PATCH 02/34] Add dependency needed by the pubmed live import provider --- dspace-api/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index a0714c04eecb..7de2b782af58 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -622,6 +622,12 @@ jersey-client ${jersey.version} + + + org.glassfish.jersey.inject + jersey-hk2 + ${jersey.version} + com.amazonaws From 38c566723384729f1e97ab0b7687d6696bb79180 Mon Sep 17 00:00:00 2001 From: Andrea Bollini Date: Sat, 20 Jun 2020 18:52:23 +0200 Subject: [PATCH 03/34] DS-4515 enable live import provider as external authority --- .../provider/impl/LiveImportDataProvider.java | 142 ++++++++++++++++++ .../config/spring/api/external-services.xml | 6 + 2 files changed, 148 insertions(+) create mode 100644 dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java diff --git a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java new file mode 100644 index 000000000000..776984ddb769 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java @@ -0,0 +1,142 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.external.provider.impl; + +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; +import org.dspace.content.dto.MetadataValueDTO; +import org.dspace.external.model.ExternalDataObject; +import org.dspace.external.provider.ExternalDataProvider; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; +import org.dspace.importer.external.service.components.MetadataSource; + +/** + * This class allows to configure a Live Import Provider as an External Data Provider + * + * @author Andrea Bollini (andrea.bollini at 4science.it) + * + */ +public class LiveImportDataProvider implements ExternalDataProvider { + /** + * The {@link MetadataSource} live import provider + */ + private MetadataSource metadataSource; + + /** + * An unique human readable identifier for this provider + */ + private String sourceIdentifier; + + private String recordIdMetadata; + + private String displayMetadata = "dc.title"; + + @Override + public String getSourceIdentifier() { + return sourceIdentifier; + } + + public void setSourceIdentifier(String sourceIdentifier) { + this.sourceIdentifier = sourceIdentifier; + } + + public void setMetadataSource(MetadataSource metadataSource) { + this.metadataSource = metadataSource; + } + + public void setRecordIdMetadata(String recordIdMetadata) { + this.recordIdMetadata = recordIdMetadata; + } + + public void setDisplayMetadata(String displayMetadata) { + this.displayMetadata = displayMetadata; + } + + @Override + public Optional getExternalDataObject(String id) { + try { + ExternalDataObject externalDataObject = getExternalDataObject(metadataSource.getRecord(id)); + return Optional.of(externalDataObject); + } catch (MetadataSourceException e) { + throw new RuntimeException( + "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + } + } + + @Override + public List searchExternalDataObjects(String query, int start, int limit) { + Collection records; + try { + records = metadataSource.getRecords(query, start, limit); + return records.stream().map(r -> getExternalDataObject(r)).collect(Collectors.toList()); + } catch (MetadataSourceException e) { + throw new RuntimeException( + "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + } + } + + @Override + public boolean supports(String source) { + return StringUtils.equalsIgnoreCase(sourceIdentifier, source); + } + + @Override + public int getNumberOfResults(String query) { + try { + return metadataSource.getNbRecords(query); + } catch (MetadataSourceException e) { + throw new RuntimeException( + "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + } + } + + /** + * Internal method to convert an ImportRecord to an ExternalDataObject + * + * FIXME it would be useful to remove ImportRecord at all in favor of the + * ExternalDataObject + * + * @param record + * @return + */ + private ExternalDataObject getExternalDataObject(ImportRecord record) { + ExternalDataObject externalDataObject = new ExternalDataObject(sourceIdentifier); + String id = getFirstValue(record, recordIdMetadata); + String display = getFirstValue(record, displayMetadata); + externalDataObject.setId(id); + externalDataObject.setDisplayValue(display); + externalDataObject.setValue(display); + for (MetadatumDTO dto : record.getValueList()) { + // FIXME it would be useful to remove MetadatumDTO in favor of MetadataValueDTO + MetadataValueDTO mvDTO = new MetadataValueDTO(); + mvDTO.setSchema(dto.getSchema()); + mvDTO.setElement(dto.getElement()); + mvDTO.setQualifier(dto.getQualifier()); + mvDTO.setValue(dto.getValue()); + externalDataObject.addMetadata(mvDTO); + } + return externalDataObject; + } + + private String getFirstValue(ImportRecord record, String metadata) { + String id = null; + String[] split = StringUtils.split(metadata, ".", 3); + Collection values = record.getValue(split[0], split[1], split.length == 3 ? split[2] : null); + if (!values.isEmpty()) { + id = (values.iterator().next().getValue()); + } + return id; + } + +} diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index 520c21a9638f..098b53c2ca19 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -30,5 +30,11 @@ + + + + + + From bd2cf94376e5560559d838c04d74cd0817c654a4 Mon Sep 17 00:00:00 2001 From: Andrea Bollini Date: Sat, 20 Jun 2020 18:52:45 +0200 Subject: [PATCH 04/34] DS-4529 External authorities endpoint doesn't support the pagination --- .../app/rest/repository/ExternalSourceRestRepository.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ExternalSourceRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ExternalSourceRestRepository.java index 49a128cd85c2..948e25e364c9 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ExternalSourceRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ExternalSourceRestRepository.java @@ -89,10 +89,10 @@ public ExternalSourceRest findOne(Context context, String externalSourceName) { } @Override + @PreAuthorize("permitAll()") public Page findAll(Context context, Pageable pageable) { List externalSources = externalDataService.getExternalDataProviders(); - return converter.toRestPage(externalSources, pageable, externalSources.size(), - utils.obtainProjection()); + return converter.toRestPage(externalSources, pageable, utils.obtainProjection()); } public Class getDomainClass() { From 808c4633f159246543b79c723f3617e06c70c0db Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 1 Jul 2020 10:36:43 +0200 Subject: [PATCH 05/34] submit external suorce partial implementation --- .../metadatamapping/ArXivFieldMapping.java | 23 ++ .../transform/GenerateArXivQueryService.java | 53 +++ .../ArXivImportMetadataSourceServiceImpl.java | 310 ++++++++++++++++++ .../AbstractImportMetadataSourceService.java | 2 - .../spring-dspace-addon-import-services.xml | 21 +- .../config/spring/api/arxiv-integration.xml | 136 ++++++++ .../config/spring/api/external-services.xml | 9 +- 7 files changed, 547 insertions(+), 7 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java create mode 100644 dspace/config/spring/api/arxiv-integration.xml diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java new file mode 100644 index 000000000000..c4f6996a276a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java @@ -0,0 +1,23 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.metadatamapping; + +import java.util.Map; +import javax.annotation.Resource; + +import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; + +public class ArXivFieldMapping extends AbstractMetadataFieldMapping { + + @Override + @Resource(name = "arxivMetadataFieldMap") + public void setMetadataFieldMap(Map metadataFieldMap) { + super.setMetadataFieldMap(metadataFieldMap); + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java new file mode 100644 index 000000000000..7f5e08cb5af3 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java @@ -0,0 +1,53 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.metadatamapping.transform; + +import java.util.List; + +import org.dspace.content.Item; +import org.dspace.content.MetadataValue; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.ItemService; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService; + +public class GenerateArXivQueryService implements GenerateQueryService { + + /** + * Create a Query object based on a given item. + * If the item has at least 1 value for dc.identifier.doi, the first one will be used. + * If no DOI is found, the title will be used. + * When no DOI or title is found, an null object is returned instead. + * + * @param item the Item to create a Query from + */ + @Override + public Query generateQueryForItem(Item item) throws MetadataSourceException { + Query query = new Query(); + + // Retrieve an instance of the ItemService to access business calls on an item. + ItemService itemService = ContentServiceFactory.getInstance().getItemService(); + List doi = itemService.getMetadata(item, "dc", "identifier", "doi", Item.ANY); + + if (doi.size() > 0) { + query.addParameter("term", doi.get(0).getValue()); + query.addParameter("field", "ELocationID"); + return query; + } + + List title = itemService.getMetadata(item, "dc", "title", null, Item.ANY); + + if (title.size() > 0) { + query.addParameter("term", title.get(0).getValue()); + query.addParameter("field", "title"); + return query; + } + return null; + } +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java new file mode 100644 index 000000000000..567cce1b9a4d --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -0,0 +1,310 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.service; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.axiom.om.OMElement; +import org.apache.axiom.om.OMXMLBuilderFactory; +import org.apache.axiom.om.OMXMLParserWrapper; +import org.apache.axiom.om.xpath.AXIOMXPath; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.HttpException; +import org.apache.http.HttpResponse; +import org.apache.http.HttpStatus; +import org.apache.http.StatusLine; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.params.CoreConnectionPNames; +import org.apache.http.params.HttpParams; +import org.dspace.content.Item; +import org.dspace.importer.external.datamodel.ImportRecord; +import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.MetadataSourceException; +import org.dspace.importer.external.service.AbstractImportMetadataSourceService; +import org.jaxen.JaxenException; + +public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService { + private int timeout = 1000; + + /** + * How long to wait for a connection to be established. + * + * @param timeout milliseconds + */ + public void setTimeout(int timeout) { + this.timeout = timeout; + } + + + @Override + public Collection getRecords(String query, int start, int count) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query, count, start)); + } + + @Override + public Collection getRecords(Query query) throws MetadataSourceException { + return retry(new SearchByQueryCallable(query)); + } + + @Override + public int getNbRecords(String query) throws MetadataSourceException { + List records = retry(new SearchByQueryCallable(query, null, null)); + return records != null ? records.size() : 0; + } + + @Override + public int getNbRecords(Query query) throws MetadataSourceException { + List records = retry(new SearchByQueryCallable(query)); + return records != null ? records.size() : 0; + } + + + @Override + public ImportRecord getRecord(String id) throws MetadataSourceException { + List records = retry(new SearchByIdCallable(id)); + if (records != null && records.size() > 1) { + throw new MetadataSourceException("More than one result found"); + } + return records == null ? null : records.get(0); + } + + @Override + public ImportRecord getRecord(Query query) throws MetadataSourceException { + List records = retry(new SearchByIdCallable(query)); + if (records != null && records.size() > 1) { + throw new MetadataSourceException("More than one result found"); + } + return records == null ? null : records.get(0); + } + + + @Override + public void init() throws Exception { + + } + + + + + + + @Override + public String getImportSource() { + return "arxiv"; + } + + @Override + public Collection findMatchingRecords(Item item) throws MetadataSourceException { + throw new RuntimeException(); + } + + @Override + public Collection findMatchingRecords(Query query) throws MetadataSourceException { + return null; + } + + private class SearchByQueryCallable implements Callable> { + private Query query; + + + private SearchByQueryCallable(String queryString, Integer maxResult, Integer start) { + query = new Query(); + query.addParameter("query", queryString); + query.addParameter("start", start); + query.addParameter("count", maxResult); + } + + private SearchByQueryCallable(Query query) { + this.query = query; + } + + + @Override + public List call() throws Exception { + List results = new ArrayList(); + String queryString = query.getParameterAsClass("query", String.class); + Integer start = query.getParameterAsClass("start", Integer.class); + Integer maxResult = query.getParameterAsClass("count", Integer.class); + + HttpGet method = null; + try { + HttpClient client = new DefaultHttpClient(); + HttpParams params = client.getParams(); + params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); + + try { + URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); + uriBuilder.addParameter("search_query", queryString); + if (maxResult != null) { + uriBuilder.addParameter("max_results", String.valueOf(maxResult)); + } + if (start != null) { + uriBuilder.addParameter("start", String.valueOf(start)); + } + method = new HttpGet(uriBuilder.build()); + } catch (URISyntaxException ex) { + throw new HttpException(ex.getMessage()); + } + + // Execute the method. + HttpResponse response = client.execute(method); + StatusLine responseStatus = response.getStatusLine(); + int statusCode = responseStatus.getStatusCode(); + + if (statusCode != HttpStatus.SC_OK) { + if (statusCode == HttpStatus.SC_BAD_REQUEST) { + throw new RuntimeException("arXiv query is not valid"); + } else { + throw new RuntimeException("Http call failed: " + + responseStatus); + } + } + + try { + InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); + BufferedReader reader = new BufferedReader(isReader); + StringBuilder sb = new StringBuilder(); + String str; + while ((str = reader.readLine()) != null) { + sb.append(str); + } + System.out.println("XML: " + sb.toString()); + List omElements = splitToRecords(sb.toString()); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + } catch (Exception e) { + throw new RuntimeException( + "ArXiv identifier is not valid or not exist"); + } + } finally { + if (method != null) { + method.releaseConnection(); + } + } + return results; + } + } + + private class SearchByIdCallable implements Callable> { + private Query query; + + private SearchByIdCallable(Query query) { + this.query = query; + } + + private SearchByIdCallable(String id) { + this.query = new Query(); + query.addParameter("id", id); + } + + @Override + public List call() throws Exception { + List results = new ArrayList(); + String arxivid = query.getParameterAsClass("id", String.class); + HttpGet method = null; + try { + HttpClient client = new DefaultHttpClient(); + HttpParams params = client.getParams(); + params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); + try { + URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); + if (StringUtils.isNotBlank(arxivid)) { + arxivid = arxivid.trim(); + if (arxivid.startsWith("http://arxiv.org/abs/")) { + arxivid = arxivid.substring("http://arxiv.org/abs/".length()); + } else if (arxivid.toLowerCase().startsWith("arxiv:")) { + arxivid = arxivid.substring("arxiv:".length()); + } + uriBuilder.addParameter("id_list", arxivid); + method = new HttpGet(uriBuilder.build()); + } + } catch (URISyntaxException ex) { + throw new HttpException(ex.getMessage()); + } + + // Execute the method. + HttpResponse response = client.execute(method); + StatusLine responseStatus = response.getStatusLine(); + int statusCode = responseStatus.getStatusCode(); + if (statusCode != HttpStatus.SC_OK) { + if (statusCode == HttpStatus.SC_BAD_REQUEST) { + throw new RuntimeException("arXiv query is not valid"); + } else { + throw new RuntimeException("Http call failed: " + + responseStatus); + } + } + try { + InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); + BufferedReader reader = new BufferedReader(isReader); + StringBuffer sb = new StringBuffer(); + String str; + while ((str = reader.readLine()) != null) { + sb.append(str); + } + List omElements = splitToRecords(sb.toString()); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + } catch (Exception e) { + throw new RuntimeException( + "ArXiv identifier is not valid or not exist"); + } + } finally { + if (method != null) { + method.releaseConnection(); + } + } + return results; + } + } + + private class FindMatchingRecordCallable implements Callable> { + private Query query; + + private FindMatchingRecordCallable(Item item) throws MetadataSourceException { + query = getGenerateQueryForItem().generateQueryForItem(item); + } + + public FindMatchingRecordCallable(Query q) { + query = q; + } + + @Override + public List call() throws Exception { + return null; + } + } + + private static List splitToRecords(String recordsSrc) { + OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(recordsSrc)); + OMElement element = records.getDocumentElement(); + AXIOMXPath xpath = null; + try { + xpath = new AXIOMXPath("ns:entry"); + xpath.addNamespace("ns", "http://www.w3.org/2005/Atom"); + List recordsList = xpath.selectNodes(element); + return recordsList; + } catch (JaxenException e) { + return null; + } + } + + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java b/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java index a803958a9dfa..3bf76438cdab 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/service/AbstractImportMetadataSourceService.java @@ -16,7 +16,6 @@ import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService; import org.dspace.importer.external.service.components.AbstractRemoteMetadataSource; import org.dspace.importer.external.service.components.MetadataSource; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Required; /** @@ -49,7 +48,6 @@ public GenerateQueryService getGenerateQueryForItem() { * * @param generateQueryForItem the query generator to be used. */ - @Autowired public void setGenerateQueryForItem(GenerateQueryService generateQueryForItem) { this.generateQueryForItem = generateQueryForItem; } diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index bbdf08561994..a351280b98b2 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -19,10 +19,6 @@ - - - + + + + + + + + + + + + + + + + + + + + Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + what metadatafield is generated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Defines how an org.dspace.content.Item is mapped to a query in scopus. Please note that exactly one of + these must be present. If multiple are present the result is undefined. + + + + \ No newline at end of file diff --git a/dspace/config/spring/api/external-services.xml b/dspace/config/spring/api/external-services.xml index 098b53c2ca19..af24e41980e6 100644 --- a/dspace/config/spring/api/external-services.xml +++ b/dspace/config/spring/api/external-services.xml @@ -31,10 +31,17 @@ - + + + + + + + + From 238317bc8df250130af162de845623702b43cd81 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 1 Jul 2020 16:50:45 +0200 Subject: [PATCH 06/34] Arxiv Live import integration --- .../ArXivImportMetadataSourceServiceImpl.java | 207 ++++++------------ .../importer/external/datamodel/Query.java | 2 +- .../config/spring/api/arxiv-integration.xml | 49 +++-- 3 files changed, 96 insertions(+), 162 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 567cce1b9a4d..3ef9a2999f61 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -7,30 +7,23 @@ */ package org.dspace.importer.external.arxiv.service; -import java.io.BufferedReader; -import java.io.InputStreamReader; import java.io.StringReader; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; +import javax.ws.rs.client.Client; +import javax.ws.rs.client.ClientBuilder; +import javax.ws.rs.client.Invocation; +import javax.ws.rs.client.WebTarget; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import org.apache.axiom.om.OMElement; import org.apache.axiom.om.OMXMLBuilderFactory; import org.apache.axiom.om.OMXMLParserWrapper; import org.apache.axiom.om.xpath.AXIOMXPath; import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.CoreConnectionPNames; -import org.apache.http.params.HttpParams; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; @@ -39,17 +32,8 @@ import org.jaxen.JaxenException; public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService { - private int timeout = 1000; - - /** - * How long to wait for a connection to be established. - * - * @param timeout milliseconds - */ - public void setTimeout(int timeout) { - this.timeout = timeout; - } + private WebTarget webTarget; @Override public Collection getRecords(String query, int start, int count) throws MetadataSourceException { @@ -95,7 +79,8 @@ public ImportRecord getRecord(Query query) throws MetadataSourceException { @Override public void init() throws Exception { - + Client client = ClientBuilder.newClient(); + webTarget = client.target("http://export.arxiv.org/api/query"); } @@ -115,7 +100,7 @@ public Collection findMatchingRecords(Item item) throws MetadataSo @Override public Collection findMatchingRecords(Query query) throws MetadataSourceException { - return null; + return retry(new FindMatchingRecordCallable(query)); } private class SearchByQueryCallable implements Callable> { @@ -140,62 +125,19 @@ public List call() throws Exception { String queryString = query.getParameterAsClass("query", String.class); Integer start = query.getParameterAsClass("start", Integer.class); Integer maxResult = query.getParameterAsClass("count", Integer.class); - - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - HttpParams params = client.getParams(); - params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - - try { - URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); - uriBuilder.addParameter("search_query", queryString); - if (maxResult != null) { - uriBuilder.addParameter("max_results", String.valueOf(maxResult)); - } - if (start != null) { - uriBuilder.addParameter("start", String.valueOf(start)); - } - method = new HttpGet(uriBuilder.build()); - } catch (URISyntaxException ex) { - throw new HttpException(ex.getMessage()); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine responseStatus = response.getStatusLine(); - int statusCode = responseStatus.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - if (statusCode == HttpStatus.SC_BAD_REQUEST) { - throw new RuntimeException("arXiv query is not valid"); - } else { - throw new RuntimeException("Http call failed: " - + responseStatus); - } - } - - try { - InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); - BufferedReader reader = new BufferedReader(isReader); - StringBuilder sb = new StringBuilder(); - String str; - while ((str = reader.readLine()) != null) { - sb.append(str); - } - System.out.println("XML: " + sb.toString()); - List omElements = splitToRecords(sb.toString()); - for (OMElement record : omElements) { - results.add(transformSourceRecords(record)); - } - } catch (Exception e) { - throw new RuntimeException( - "ArXiv identifier is not valid or not exist"); - } - } finally { - if (method != null) { - method.releaseConnection(); - } + WebTarget local = webTarget.queryParam("search_query", queryString); + if (maxResult != null) { + local = local.queryParam("max_results", String.valueOf(maxResult)); + } + if (start != null) { + local = local.queryParam("start", String.valueOf(start)); + } + Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); + Response response = invocationBuilder.get(); + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); } return results; } @@ -217,78 +159,64 @@ private SearchByIdCallable(String id) { public List call() throws Exception { List results = new ArrayList(); String arxivid = query.getParameterAsClass("id", String.class); - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - HttpParams params = client.getParams(); - params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - try { - URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); - if (StringUtils.isNotBlank(arxivid)) { - arxivid = arxivid.trim(); - if (arxivid.startsWith("http://arxiv.org/abs/")) { - arxivid = arxivid.substring("http://arxiv.org/abs/".length()); - } else if (arxivid.toLowerCase().startsWith("arxiv:")) { - arxivid = arxivid.substring("arxiv:".length()); - } - uriBuilder.addParameter("id_list", arxivid); - method = new HttpGet(uriBuilder.build()); - } - } catch (URISyntaxException ex) { - throw new HttpException(ex.getMessage()); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine responseStatus = response.getStatusLine(); - int statusCode = responseStatus.getStatusCode(); - if (statusCode != HttpStatus.SC_OK) { - if (statusCode == HttpStatus.SC_BAD_REQUEST) { - throw new RuntimeException("arXiv query is not valid"); - } else { - throw new RuntimeException("Http call failed: " - + responseStatus); - } - } - try { - InputStreamReader isReader = new InputStreamReader(response.getEntity().getContent()); - BufferedReader reader = new BufferedReader(isReader); - StringBuffer sb = new StringBuffer(); - String str; - while ((str = reader.readLine()) != null) { - sb.append(str); - } - List omElements = splitToRecords(sb.toString()); - for (OMElement record : omElements) { - results.add(transformSourceRecords(record)); - } - } catch (Exception e) { - throw new RuntimeException( - "ArXiv identifier is not valid or not exist"); - } - } finally { - if (method != null) { - method.releaseConnection(); + if (StringUtils.isNotBlank(arxivid)) { + arxivid = arxivid.trim(); + if (arxivid.startsWith("http://arxiv.org/abs/")) { + arxivid = arxivid.substring("http://arxiv.org/abs/".length()); + } else if (arxivid.toLowerCase().startsWith("arxiv:")) { + arxivid = arxivid.substring("arxiv:".length()); } } + WebTarget local = webTarget.queryParam("id_list", arxivid); + Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); + Response response = invocationBuilder.get(); + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } return results; } } private class FindMatchingRecordCallable implements Callable> { - private Query query; - private FindMatchingRecordCallable(Item item) throws MetadataSourceException { - query = getGenerateQueryForItem().generateQueryForItem(item); - } + private Query query; - public FindMatchingRecordCallable(Query q) { + private FindMatchingRecordCallable(Query q) { query = q; } @Override public List call() throws Exception { - return null; + String queryString = getQuery(this.query); + List results = new ArrayList(); + WebTarget local = webTarget.queryParam("search_query", queryString); + Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); + Response response = invocationBuilder.get(); + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + return results; + } + + private String getQuery(Query query) { + String title = query.getParameterAsClass("title", String.class); + String author = query.getParameterAsClass("author", String.class); + StringBuffer queryString = new StringBuffer(); + if (StringUtils.isNotBlank(title)) { + queryString.append("ti:\"").append(title).append("\""); + } + if (StringUtils.isNotBlank(author)) { + // [FAU] + if (queryString.length() > 0) { + queryString.append(" AND "); + } + queryString.append("au:\"").append(author).append("\""); + } + return queryString.toString(); } } @@ -306,5 +234,4 @@ private static List splitToRecords(String recordsSrc) { } } - } diff --git a/dspace-api/src/main/java/org/dspace/importer/external/datamodel/Query.java b/dspace-api/src/main/java/org/dspace/importer/external/datamodel/Query.java index 8c5e1b394a01..8f392bdb5253 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/datamodel/Query.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/datamodel/Query.java @@ -71,7 +71,7 @@ public T getParameterAsClass(String key, Class clazz) { return null; } else { Object o = c.iterator().next(); - if (clazz.isAssignableFrom(o.getClass())) { + if (o != null && clazz.isAssignableFrom(o.getClass())) { return (T) o; } else { return null; diff --git a/dspace/config/spring/api/arxiv-integration.xml b/dspace/config/spring/api/arxiv-integration.xml index d16a1ae52a40..a22bfe9eeb86 100644 --- a/dspace/config/spring/api/arxiv-integration.xml +++ b/dspace/config/spring/api/arxiv-integration.xml @@ -30,55 +30,55 @@ - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + @@ -118,8 +118,15 @@ - - + + + + + + + @@ -133,4 +140,4 @@ - \ No newline at end of file + From b15fa7c5b0c381dd05d6493d0b5dd3e08a402c02 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 2 Jul 2020 02:32:11 +0200 Subject: [PATCH 07/34] show ids and links in response --- .../ArXivIdMetadataContributor.java | 180 ++++++++++++++++++ .../ArXivImportMetadataSourceServiceImpl.java | 167 ++++++++++++++-- .../spring-dspace-addon-import-services.xml | 2 +- .../config/spring/api/arxiv-integration.xml | 115 +++++------ 4 files changed, 377 insertions(+), 87 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java new file mode 100644 index 000000000000..018cde8ac79b --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -0,0 +1,180 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.arxiv.metadatamapping.contributor; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import javax.annotation.Resource; + +import org.apache.axiom.om.OMAttribute; +import org.apache.axiom.om.OMElement; +import org.apache.axiom.om.OMText; +import org.apache.axiom.om.xpath.AXIOMXPath; +import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; +import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; +import org.dspace.importer.external.metadatamapping.MetadatumDTO; +import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor; +import org.jaxen.JaxenException; +import org.springframework.beans.factory.annotation.Required; + +public class ArXivIdMetadataContributor implements MetadataContributor { + private MetadataFieldConfig field; + + /** + * Return prefixToNamespaceMapping + * + * @return a prefixToNamespaceMapping map + */ + public Map getPrefixToNamespaceMapping() { + return prefixToNamespaceMapping; + } + + private MetadataFieldMapping> metadataFieldMapping; + + /** + * Return metadataFieldMapping + * + * @return MetadataFieldMapping + */ + public MetadataFieldMapping> getMetadataFieldMapping() { + return metadataFieldMapping; + } + + /** + * Set the metadataFieldMapping of this ArXivIdMetadataContributor + * + * @param metadataFieldMapping the new mapping. + */ + public void setMetadataFieldMapping( + MetadataFieldMapping> metadataFieldMapping) { + this.metadataFieldMapping = metadataFieldMapping; + } + + /** + * Set the prefixToNamespaceMapping for this object, + * + * @param prefixToNamespaceMapping the new mapping. + */ + @Resource(name = "isiFullprefixMapping") + public void setPrefixToNamespaceMapping(Map prefixToNamespaceMapping) { + this.prefixToNamespaceMapping = prefixToNamespaceMapping; + } + + private Map prefixToNamespaceMapping; + + /** + * Initialize ArXivIdMetadataContributor with a query, prefixToNamespaceMapping and MetadataFieldConfig + * + * @param query query string + * @param prefixToNamespaceMapping metadata prefix to namespace mapping + * @param field + * MetadataFieldConfig + */ + public ArXivIdMetadataContributor(String query, Map prefixToNamespaceMapping, + MetadataFieldConfig field) { + this.query = query; + this.prefixToNamespaceMapping = prefixToNamespaceMapping; + this.field = field; + } + + /** + * Empty constructor for ArXivIdMetadataContributor + */ + public ArXivIdMetadataContributor() { + + } + + private String query; + + /** + * Return the MetadataFieldConfig used while retrieving MetadatumDTO + * + * @return MetadataFieldConfig + */ + public MetadataFieldConfig getField() { + return field; + } + + /** + * Setting the MetadataFieldConfig + * + * @param field MetadataFieldConfig used while retrieving MetadatumDTO + */ + @Required + public void setField(MetadataFieldConfig field) { + this.field = field; + } + + /** + * Return query used to create an xpathExpression on, this query is used to + * + * @return the query this instance is based on + */ + public String getQuery() { + return query; + } + + @Required + public void setQuery(String query) { + this.query = query; + } + + /** + * Retrieve the metadata associated with the given object. + * Depending on the retrieved node (using the query), different types of values will be added to the MetadatumDTO + * list + * + * @param t A class to retrieve metadata from. + * @return a collection of import records. Only the identifier of the found records may be put in the record. + */ + @Override + public Collection contributeMetadata(OMElement t) { + List values = new LinkedList<>(); + try { + AXIOMXPath xpath = new AXIOMXPath(query); + for (String ns : prefixToNamespaceMapping.keySet()) { + xpath.addNamespace(prefixToNamespaceMapping.get(ns), ns); + } + List nodes = xpath.selectNodes(t); + for (Object el : nodes) { + if (el instanceof OMElement) { + values.add(metadataFieldMapping.toDCValue(field, ((OMElement) el).getText())); + } else if (el instanceof OMAttribute) { + values.add(metadataFieldMapping.toDCValue(field, ((OMAttribute) el).getAttributeValue())); + } else if (el instanceof String) { + values.add(metadataFieldMapping.toDCValue(field, (String) el)); + } else if (el instanceof OMText) { + values.add(metadataFieldMapping.toDCValue(field, ((OMText) el).getText())); + } else { + System.err.println("node of type: " + el.getClass()); + } + } + parseValue(values); + return values; + } catch (JaxenException e) { + System.err.println(query); + throw new RuntimeException(e); + } + } + + private void parseValue(List dtos) { + if (dtos != null) { + for (MetadatumDTO dto : dtos) { + if (dto != null && dto.getValue() != null) { + int startIndex = dto.getValue().lastIndexOf('/'); + int endIndex = dto.getValue().length(); + String id = dto.getValue().substring(startIndex + 1, endIndex); + dto.setValue(id); + } + } + } + } + +} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 3ef9a2999f61..429a54a01370 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -31,78 +31,185 @@ import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.jaxen.JaxenException; +/** + * Implements a data source for querying ArXiv + * + * @author Pasquale Cavallo (pasquale.cavallo at 4Science dot it) + * + */ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService { private WebTarget webTarget; - + private String baseAddress; + + /** + * Find the number of records matching a string query. Supports pagination + * + * @param query a query string to base the search on. + * @param start offset to start at + * @param count number of records to retrieve. + * @return a set of records. Fully transformed. + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public Collection getRecords(String query, int start, int count) throws MetadataSourceException { return retry(new SearchByQueryCallable(query, count, start)); } + /** + * Find records based on a object query. + * + * @param query a query object to base the search on. + * @return a set of records. Fully transformed. + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public Collection getRecords(Query query) throws MetadataSourceException { return retry(new SearchByQueryCallable(query)); } + /** + * Find the number of records matching a query; + * + * @param query a query object to base the search on. + * @return the sum of the matching records over this import source + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public int getNbRecords(String query) throws MetadataSourceException { - List records = retry(new SearchByQueryCallable(query, null, null)); - return records != null ? records.size() : 0; + return retry(new CountByQueryCallable(query)); } + + /** + * Find the number of records matching a query; + * + * @param query a query string to base the search on. + * @return the sum of the matching records over this import source + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public int getNbRecords(Query query) throws MetadataSourceException { - List records = retry(new SearchByQueryCallable(query)); - return records != null ? records.size() : 0; + return retry(new CountByQueryCallable(query)); } + /** + * Get a single record from the source by id + * + * @param id id of the record in ArXiv + * @return the first matching record + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public ImportRecord getRecord(String id) throws MetadataSourceException { List records = retry(new SearchByIdCallable(id)); - if (records != null && records.size() > 1) { - throw new MetadataSourceException("More than one result found"); - } - return records == null ? null : records.get(0); + return records == null || records.isEmpty() ? null : records.get(0); } + /** + * Get a single record from the source. + * + * @param query a query matching a single record + * @return the first matching record + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public ImportRecord getRecord(Query query) throws MetadataSourceException { List records = retry(new SearchByIdCallable(query)); - if (records != null && records.size() > 1) { - throw new MetadataSourceException("More than one result found"); - } - return records == null ? null : records.get(0); + return records == null || records.isEmpty() ? null : records.get(0); } - + /** + * Initialize the class + * + * @throws Exception on generic exception + */ @Override public void init() throws Exception { Client client = ClientBuilder.newClient(); - webTarget = client.target("http://export.arxiv.org/api/query"); + webTarget = client.target(baseAddress); } - - - - - + /** + * The string that identifies this import implementation. Preferable a URI + * + * @return the identifying uri + */ @Override public String getImportSource() { return "arxiv"; } + /** + * NOT IMPLEMENTED: Finds records based on an item + * + * @param item an item to base the search on + * @return a collection of import records. Only the identifier of the found records may be put in the record. + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public Collection findMatchingRecords(Item item) throws MetadataSourceException { throw new RuntimeException(); } + /** + * Finds records based on query object. + * Supports search by title and/or author + * + * @param query a query object to base the search on. + * @return a collection of import records. + * @throws MetadataSourceException if the underlying methods throw any exception. + */ @Override public Collection findMatchingRecords(Query query) throws MetadataSourceException { return retry(new FindMatchingRecordCallable(query)); } + private class CountByQueryCallable implements Callable { + private Query query; + + + private CountByQueryCallable(String queryString) { + query = new Query(); + query.addParameter("query", queryString); + } + + private CountByQueryCallable(Query query) { + this.query = query; + } + + + @Override + public Integer call() throws Exception { + String queryString = query.getParameterAsClass("query", String.class); + Integer start = query.getParameterAsClass("start", Integer.class); + Integer maxResult = query.getParameterAsClass("count", Integer.class); + WebTarget local = webTarget.queryParam("search_query", queryString); + if (maxResult != null) { + local = local.queryParam("max_results", String.valueOf(maxResult)); + } + if (start != null) { + local = local.queryParam("start", String.valueOf(start)); + } + Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); + Response response = invocationBuilder.get(); + String responseString = response.readEntity(String.class); + OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(responseString)); + OMElement element = records.getDocumentElement(); + AXIOMXPath xpath = null; + try { + xpath = new AXIOMXPath("opensearch:totalResults"); + xpath.addNamespace("opensearch", "http://a9.com/-/spec/opensearch/1.1/"); + OMElement count = (OMElement) xpath.selectSingleNode(element); + return Integer.parseInt(count.getText()); + } catch (JaxenException e) { + return null; + } + } + } + + private class SearchByQueryCallable implements Callable> { private Query query; @@ -220,7 +327,7 @@ private String getQuery(Query query) { } } - private static List splitToRecords(String recordsSrc) { + private List splitToRecords(String recordsSrc) { OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(recordsSrc)); OMElement element = records.getDocumentElement(); AXIOMXPath xpath = null; @@ -234,4 +341,22 @@ private static List splitToRecords(String recordsSrc) { } } + /** + * Return the baseAddress set to this object + * + * @return The String object that represents the baseAddress of this object + */ + public String getBaseAddress() { + return baseAddress; + } + + /** + * Set the baseAddress to this object + * + * @param baseAddress The String object that represents the baseAddress of this object + */ + public void setBaseAddress(String baseAddress) { + this.baseAddress = baseAddress; + } + } diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index a351280b98b2..d2d6dd10c1df 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -37,7 +37,7 @@ class="org.dspace.importer.external.arxiv.service.ArXivImportMetadataSourceServiceImpl" scope="singleton"> - + diff --git a/dspace/config/spring/api/arxiv-integration.xml b/dspace/config/spring/api/arxiv-integration.xml index a22bfe9eeb86..9b453e3fc9e0 100644 --- a/dspace/config/spring/api/arxiv-integration.xml +++ b/dspace/config/spring/api/arxiv-integration.xml @@ -17,107 +17,93 @@ only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over what metadatafield is generated. - - - - - - + - - + + + + + + - - - - - - - - - - - - + + - - - - - + + - - - - - - - + + + + + - - - - - + + - - - - + + + + - - - + + - - + + + + - - + - - + + + + - - - + + - - - + + + + + - - - + + - - + + + + - - - + + + + @@ -129,7 +115,6 @@ - From f71866b5e300453aec669e24ea087204d93ff90c Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 2 Jul 2020 10:30:49 +0200 Subject: [PATCH 08/34] Add comment --- .../contributor/ArXivIdMetadataContributor.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index 018cde8ac79b..ed38f955c8ea 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -24,6 +24,13 @@ import org.jaxen.JaxenException; import org.springframework.beans.factory.annotation.Required; +/** + * Arxiv specific implementation of {@link MetadataContributor} + * Responsible for generating the ArXiv Id from the retrieved item. + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ public class ArXivIdMetadataContributor implements MetadataContributor { private MetadataFieldConfig field; From 03ac31dc109d733917c94f1bf1cb8d26a6f14bcf Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 2 Jul 2020 12:05:50 +0200 Subject: [PATCH 09/34] Exception handling --- .../transform/GenerateArXivQueryService.java | 53 ------------------- .../ArXivImportMetadataSourceServiceImpl.java | 3 +- .../DSpaceApiExceptionControllerAdvice.java | 9 ++++ .../config/spring/api/arxiv-integration.xml | 9 ---- 4 files changed, 11 insertions(+), 63 deletions(-) delete mode 100644 dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java deleted file mode 100644 index 7f5e08cb5af3..000000000000 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/transform/GenerateArXivQueryService.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.importer.external.arxiv.metadatamapping.transform; - -import java.util.List; - -import org.dspace.content.Item; -import org.dspace.content.MetadataValue; -import org.dspace.content.factory.ContentServiceFactory; -import org.dspace.content.service.ItemService; -import org.dspace.importer.external.datamodel.Query; -import org.dspace.importer.external.exception.MetadataSourceException; -import org.dspace.importer.external.metadatamapping.transform.GenerateQueryService; - -public class GenerateArXivQueryService implements GenerateQueryService { - - /** - * Create a Query object based on a given item. - * If the item has at least 1 value for dc.identifier.doi, the first one will be used. - * If no DOI is found, the title will be used. - * When no DOI or title is found, an null object is returned instead. - * - * @param item the Item to create a Query from - */ - @Override - public Query generateQueryForItem(Item item) throws MetadataSourceException { - Query query = new Query(); - - // Retrieve an instance of the ItemService to access business calls on an item. - ItemService itemService = ContentServiceFactory.getInstance().getItemService(); - List doi = itemService.getMetadata(item, "dc", "identifier", "doi", Item.ANY); - - if (doi.size() > 0) { - query.addParameter("term", doi.get(0).getValue()); - query.addParameter("field", "ELocationID"); - return query; - } - - List title = itemService.getMetadata(item, "dc", "title", null, Item.ANY); - - if (title.size() > 0) { - query.addParameter("term", title.get(0).getValue()); - query.addParameter("field", "title"); - return query; - } - return null; - } -} diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 429a54a01370..2995466c3c4a 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -27,6 +27,7 @@ import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; +import org.dspace.importer.external.exception.ExternalProviderMethodNotImplementedException; import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.jaxen.JaxenException; @@ -150,7 +151,7 @@ public String getImportSource() { */ @Override public Collection findMatchingRecords(Item item) throws MetadataSourceException { - throw new RuntimeException(); + throw new ExternalProviderMethodNotImplementedException("This method is not implemented for ArXiv"); } /** diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/exception/DSpaceApiExceptionControllerAdvice.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/exception/DSpaceApiExceptionControllerAdvice.java index d255b6fe2724..07cc31420e9a 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/exception/DSpaceApiExceptionControllerAdvice.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/exception/DSpaceApiExceptionControllerAdvice.java @@ -16,6 +16,7 @@ import org.dspace.app.rest.security.RestAuthenticationService; import org.dspace.authorize.AuthorizeException; +import org.dspace.importer.external.exception.ExternalProviderMethodNotImplementedException; import org.springframework.beans.TypeMismatchException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.annotation.AnnotationUtils; @@ -38,6 +39,8 @@ * @author Tom Desair (tom dot desair at atmire dot com) * @author Frederic Van Reet (frederic dot vanreet at atmire dot com) * @author Andrea Bollini (andrea.bollini at 4science.it) + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * */ @ControllerAdvice public class DSpaceApiExceptionControllerAdvice extends ResponseEntityExceptionHandler { @@ -45,6 +48,12 @@ public class DSpaceApiExceptionControllerAdvice extends ResponseEntityExceptionH @Autowired private RestAuthenticationService restAuthenticationService; + @ExceptionHandler(ExternalProviderMethodNotImplementedException.class) + protected void externalResourceMethodNotImplementedException(HttpServletRequest request, + HttpServletResponse response, ExternalProviderMethodNotImplementedException ex) throws IOException { + sendErrorResponse(request, response, ex, ex.getMessage(), HttpServletResponse.SC_NOT_IMPLEMENTED); + } + @ExceptionHandler({AuthorizeException.class, RESTAuthorizationException.class, AccessDeniedException.class}) protected void handleAuthorizeException(HttpServletRequest request, HttpServletResponse response, Exception ex) throws IOException { diff --git a/dspace/config/spring/api/arxiv-integration.xml b/dspace/config/spring/api/arxiv-integration.xml index 9b453e3fc9e0..e963e73a2055 100644 --- a/dspace/config/spring/api/arxiv-integration.xml +++ b/dspace/config/spring/api/arxiv-integration.xml @@ -101,9 +101,6 @@ - - - @@ -119,10 +116,4 @@ - - Defines how an org.dspace.content.Item is mapped to a query in scopus. Please note that exactly one of - these must be present. If multiple are present the result is undefined. - - - From 02953eb087c931dba77fd84d3e6ba788464b4458 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 2 Jul 2020 14:45:04 +0200 Subject: [PATCH 10/34] Update comment and minor fix --- .../metadatamapping/ArXivFieldMapping.java | 14 +++++++ .../ArXivIdMetadataContributor.java | 6 +-- ...ProviderMethodNotImplementedException.java | 40 +++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java index c4f6996a276a..272b14901514 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/ArXivFieldMapping.java @@ -12,8 +12,22 @@ import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping; +/** + * An implementation of {@link AbstractMetadataFieldMapping} + * Responsible for defining the mapping of the ArXiv metadatum fields on the DSpace metadatum fields + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + */ public class ArXivFieldMapping extends AbstractMetadataFieldMapping { + /** + * Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it + * only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over + * what metadatafield is generated. + * + * @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to + * the item. + */ @Override @Resource(name = "arxivMetadataFieldMap") public void setMetadataFieldMap(Map metadataFieldMap) { diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index ed38f955c8ea..727a3fcf4e37 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -174,10 +174,10 @@ public Collection contributeMetadata(OMElement t) { private void parseValue(List dtos) { if (dtos != null) { for (MetadatumDTO dto : dtos) { - if (dto != null && dto.getValue() != null) { - int startIndex = dto.getValue().lastIndexOf('/'); + if (dto != null && dto.getValue() != null && dto.getValue().contains("/")) { + int startIndex = dto.getValue().lastIndexOf('/') + 1; int endIndex = dto.getValue().length(); - String id = dto.getValue().substring(startIndex + 1, endIndex); + String id = dto.getValue().substring(startIndex, endIndex); dto.setValue(id); } } diff --git a/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java b/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java new file mode 100644 index 000000000000..e8df7a38615a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java @@ -0,0 +1,40 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.importer.external.exception; + +/** + * Exception used when an External Provider don't implements the invoked method + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ +public class ExternalProviderMethodNotImplementedException extends RuntimeException { + + private static final long serialVersionUID = 5268699485635863003L; + + public ExternalProviderMethodNotImplementedException() { + super(); + } + + public ExternalProviderMethodNotImplementedException(String message, Throwable cause, boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + public ExternalProviderMethodNotImplementedException(String message, Throwable cause) { + super(message, cause); + } + + public ExternalProviderMethodNotImplementedException(String message) { + super(message); + } + + public ExternalProviderMethodNotImplementedException(Throwable cause) { + super(cause); + } +} From dbde82478b052371bfc1b3780a622797b3ebf333 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 2 Jul 2020 16:22:36 +0200 Subject: [PATCH 11/34] remove unused code --- .../ArXivImportMetadataSourceServiceImpl.java | 11 +- ...ProviderMethodNotImplementedException.java | 40 --- .../submit/lookup/ArXivFileDataLoader.java | 146 -------- .../submit/lookup/ArXivOnlineDataLoader.java | 84 ----- .../dspace/submit/lookup/ArXivService.java | 159 --------- .../org/dspace/submit/lookup/ArxivUtils.java | 151 --------- .../submit/lookup/PubmedFileDataLoader.java | 148 -------- .../submit/lookup/PubmedOnlineDataLoader.java | 116 ------- .../dspace/submit/lookup/PubmedService.java | 265 --------------- .../org/dspace/submit/lookup/PubmedUtils.java | 316 ------------------ .../spring-dspace-addon-import-services.xml | 1 - .../DSpaceApiExceptionControllerAdvice.java | 7 - dspace/config/spring/api/bte.xml | 107 ------ .../spring/api/step-processing-listener.xml | 2 - 14 files changed, 4 insertions(+), 1549 deletions(-) delete mode 100644 dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/ArXivFileDataLoader.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/ArXivOnlineDataLoader.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/ArxivUtils.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/PubmedFileDataLoader.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/PubmedOnlineDataLoader.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/PubmedUtils.java diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 2995466c3c4a..863a8144b4d3 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -12,6 +12,7 @@ import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; +import javax.el.MethodNotFoundException; import javax.ws.rs.client.Client; import javax.ws.rs.client.ClientBuilder; import javax.ws.rs.client.Invocation; @@ -27,7 +28,6 @@ import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.datamodel.Query; -import org.dspace.importer.external.exception.ExternalProviderMethodNotImplementedException; import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.jaxen.JaxenException; @@ -143,15 +143,12 @@ public String getImportSource() { } /** - * NOT IMPLEMENTED: Finds records based on an item - * - * @param item an item to base the search on - * @return a collection of import records. Only the identifier of the found records may be put in the record. - * @throws MetadataSourceException if the underlying methods throw any exception. + * Expect this method will be not used and erased from the interface soon */ @Override public Collection findMatchingRecords(Item item) throws MetadataSourceException { - throw new ExternalProviderMethodNotImplementedException("This method is not implemented for ArXiv"); + // FIXME: we need this method? + throw new MethodNotFoundException("This method is not implemented for ArXiv"); } /** diff --git a/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java b/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java deleted file mode 100644 index e8df7a38615a..000000000000 --- a/dspace-api/src/main/java/org/dspace/importer/external/exception/ExternalProviderMethodNotImplementedException.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.importer.external.exception; - -/** - * Exception used when an External Provider don't implements the invoked method - * - * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) - * - */ -public class ExternalProviderMethodNotImplementedException extends RuntimeException { - - private static final long serialVersionUID = 5268699485635863003L; - - public ExternalProviderMethodNotImplementedException() { - super(); - } - - public ExternalProviderMethodNotImplementedException(String message, Throwable cause, boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } - - public ExternalProviderMethodNotImplementedException(String message, Throwable cause) { - super(message, cause); - } - - public ExternalProviderMethodNotImplementedException(String message) { - super(message); - } - - public ExternalProviderMethodNotImplementedException(Throwable cause) { - super(cause); - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivFileDataLoader.java b/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivFileDataLoader.java deleted file mode 100644 index ebc898e4cfb0..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivFileDataLoader.java +++ /dev/null @@ -1,146 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ - -package org.dspace.submit.lookup; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.Map; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import gr.ekt.bte.core.DataLoadingSpec; -import gr.ekt.bte.core.Record; -import gr.ekt.bte.core.RecordSet; -import gr.ekt.bte.core.Value; -import gr.ekt.bte.dataloader.FileDataLoader; -import gr.ekt.bte.exceptions.MalformedSourceException; -import org.apache.commons.lang3.StringUtils; -import org.apache.logging.log4j.Logger; -import org.dspace.app.util.XMLUtils; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class ArXivFileDataLoader extends FileDataLoader { - - private static Logger log = org.apache.logging.log4j.LogManager.getLogger(ArXivFileDataLoader.class); - - Map fieldMap; // mapping between service fields and local - // intermediate fields - - /** - * Empty constructor - */ - public ArXivFileDataLoader() { - } - - /** - * @param filename Name of file to load ArXiv data from. - */ - public ArXivFileDataLoader(String filename) { - super(filename); - } - - /* - * {@see gr.ekt.bte.core.DataLoader#getRecords()} - * - * @throws MalformedSourceException - */ - @Override - public RecordSet getRecords() throws MalformedSourceException { - - RecordSet recordSet = new RecordSet(); - - try { - InputStream inputStream = new FileInputStream(new File(filename)); - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder db = factory.newDocumentBuilder(); - Document inDoc = db.parse(inputStream); - - Element xmlRoot = inDoc.getDocumentElement(); - List dataRoots = XMLUtils.getElementList(xmlRoot, "entry"); - - for (Element dataRoot : dataRoots) { - Record record = ArxivUtils.convertArxixDomToRecord(dataRoot); - if (record != null) { - recordSet.addRecord(convertFields(record)); - } - } - } catch (FileNotFoundException e) { - log.error(e.getMessage(), e); - } catch (ParserConfigurationException e) { - log.error(e.getMessage(), e); - } catch (SAXException e) { - log.error(e.getMessage(), e); - } catch (IOException e) { - log.error(e.getMessage(), e); - } - - return recordSet; - } - - /* - * (non-Javadoc) - * - * @see - * gr.ekt.bte.core.DataLoader#getRecords(gr.ekt.bte.core.DataLoadingSpec) - */ - @Override - public RecordSet getRecords(DataLoadingSpec spec) - throws MalformedSourceException { - if (spec.getOffset() > 0) { - return new RecordSet(); - } - return getRecords(); - } - - public Record convertFields(Record publication) { - for (String fieldName : fieldMap.keySet()) { - String md = null; - if (fieldMap != null) { - md = this.fieldMap.get(fieldName); - } - - if (StringUtils.isBlank(md)) { - continue; - } else { - md = md.trim(); - } - - if (publication.isMutable()) { - List values = publication.getValues(fieldName); - publication.makeMutable().removeField(fieldName); - publication.makeMutable().addField(md, values); - } - } - - return publication; - } - - public void setFieldMap(Map fieldMap) { - this.fieldMap = fieldMap; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivOnlineDataLoader.java b/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivOnlineDataLoader.java deleted file mode 100644 index e47741262136..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivOnlineDataLoader.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import gr.ekt.bte.core.Record; -import org.apache.http.HttpException; -import org.dspace.core.Context; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class ArXivOnlineDataLoader extends NetworkSubmissionLookupDataLoader { - protected ArXivService arXivService = new ArXivService(); - - protected boolean searchProvider = true; - - public void setArXivService(ArXivService arXivService) { - this.arXivService = arXivService; - } - - @Override - public List getSupportedIdentifiers() { - return Arrays.asList(new String[] {ARXIV, DOI}); - } - - public void setSearchProvider(boolean searchProvider) { - this.searchProvider = searchProvider; - } - - @Override - public boolean isSearchProvider() { - return searchProvider; - } - - @Override - public List getByIdentifier(Context context, - Map> keys) throws HttpException, IOException { - List results = new ArrayList(); - if (keys != null) { - Set dois = keys.get(DOI); - Set arxivids = keys.get(ARXIV); - List items = new ArrayList(); - if (dois != null && dois.size() > 0) { - items.addAll(arXivService.getByDOIs(dois)); - } - if (arxivids != null && arxivids.size() > 0) { - for (String arxivid : arxivids) { - items.add(arXivService.getByArXivIDs(arxivid)); - } - } - - for (Record item : items) { - results.add(convertFields(item)); - } - } - return results; - } - - @Override - public List search(Context context, String title, String author, - int year) throws HttpException, IOException { - List results = new ArrayList(); - List items = arXivService.searchByTerm(title, author, year); - for (Record item : items) { - results.add(convertFields(item)); - } - return results; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java b/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java deleted file mode 100644 index 0a32871758a9..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java +++ /dev/null @@ -1,159 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - -import gr.ekt.bte.core.Record; -import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.CoreConnectionPNames; -import org.apache.http.params.HttpParams; -import org.dspace.app.util.XMLUtils; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class ArXivService { - private int timeout = 1000; - - /** - * How long to wait for a connection to be established. - * - * @param timeout milliseconds - */ - public void setTimeout(int timeout) { - this.timeout = timeout; - } - - public List getByDOIs(Set dois) throws HttpException, - IOException { - if (dois != null && dois.size() > 0) { - String doisQuery = StringUtils.join(dois.iterator(), " OR "); - return search(doisQuery, null, 100); - } - return null; - } - - public List searchByTerm(String title, String author, int year) - throws HttpException, IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(title)) { - query.append("ti:\"").append(title).append("\""); - } - if (StringUtils.isNotBlank(author)) { - // [FAU] - if (query.length() > 0) { - query.append(" AND "); - } - query.append("au:\"").append(author).append("\""); - } - return search(query.toString(), "", 10); - } - - protected List search(String query, String arxivid, int max_result) - throws IOException, HttpException { - List results = new ArrayList(); - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - HttpParams params = client.getParams(); - params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - - try { - URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); - uriBuilder.addParameter("id_list", arxivid); - uriBuilder.addParameter("search_query", query); - uriBuilder.addParameter("max_results", String.valueOf(max_result)); - method = new HttpGet(uriBuilder.build()); - } catch (URISyntaxException ex) { - throw new HttpException(ex.getMessage()); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine responseStatus = response.getStatusLine(); - int statusCode = responseStatus.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - if (statusCode == HttpStatus.SC_BAD_REQUEST) { - throw new RuntimeException("arXiv query is not valid"); - } else { - throw new RuntimeException("Http call failed: " - + responseStatus); - } - } - - try { - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder db = factory.newDocumentBuilder(); - Document inDoc = db.parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - List dataRoots = XMLUtils.getElementList(xmlRoot, - "entry"); - - for (Element dataRoot : dataRoots) { - Record crossitem = ArxivUtils - .convertArxixDomToRecord(dataRoot); - if (crossitem != null) { - results.add(crossitem); - } - } - } catch (Exception e) { - throw new RuntimeException( - "ArXiv identifier is not valid or not exist"); - } - } finally { - if (method != null) { - method.releaseConnection(); - } - } - - return results; - } - - public Record getByArXivIDs(String raw) throws HttpException, IOException { - if (StringUtils.isNotBlank(raw)) { - raw = raw.trim(); - if (raw.startsWith("http://arxiv.org/abs/")) { - raw = raw.substring("http://arxiv.org/abs/".length()); - } else if (raw.toLowerCase().startsWith("arxiv:")) { - raw = raw.substring("arxiv:".length()); - } - List result = search("", raw, 1); - if (result != null && result.size() > 0) { - return result.get(0); - } - } - return null; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/ArxivUtils.java b/dspace-api/src/main/java/org/dspace/submit/lookup/ArxivUtils.java deleted file mode 100644 index 4caa0a957b2a..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/ArxivUtils.java +++ /dev/null @@ -1,151 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -/** - * - */ -package org.dspace.submit.lookup; - -import java.util.LinkedList; -import java.util.List; - -import gr.ekt.bte.core.MutableRecord; -import gr.ekt.bte.core.Record; -import gr.ekt.bte.core.StringValue; -import gr.ekt.bte.core.Value; -import org.dspace.app.util.XMLUtils; -import org.dspace.submit.util.SubmissionLookupPublication; -import org.w3c.dom.Element; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class ArxivUtils { - - /** - * Default constructor - */ - private ArxivUtils() { } - - public static Record convertArxixDomToRecord(Element dataRoot) { - MutableRecord record = new SubmissionLookupPublication(""); - - String articleTitle = XMLUtils.getElementValue(dataRoot, "title"); - if (articleTitle != null) { - record.addValue("title", new StringValue(articleTitle)); - } - String summary = XMLUtils.getElementValue(dataRoot, "summary"); - if (summary != null) { - record.addValue("summary", new StringValue(summary)); - } - String year = XMLUtils.getElementValue(dataRoot, "published"); - if (year != null) { - record.addValue("published", new StringValue(year)); - } - String splashPageUrl = XMLUtils.getElementValue(dataRoot, "id"); - if (splashPageUrl != null) { - record.addValue("id", new StringValue(splashPageUrl)); - } - String comment = XMLUtils.getElementValue(dataRoot, "arxiv:comment"); - if (comment != null) { - record.addValue("comment", new StringValue(comment)); - } - - List links = XMLUtils.getElementList(dataRoot, "link"); - if (links != null) { - for (Element link : links) { - if ("related".equals(link.getAttribute("rel")) - && "pdf".equals(link.getAttribute("title"))) { - String pdfUrl = link.getAttribute("href"); - if (pdfUrl != null) { - record.addValue("pdfUrl", new StringValue(pdfUrl)); - } - } - } - } - - String doi = XMLUtils.getElementValue(dataRoot, "arxiv:doi"); - if (doi != null) { - record.addValue("doi", new StringValue(doi)); - } - String journalRef = XMLUtils.getElementValue(dataRoot, - "arxiv:journal_ref"); - if (journalRef != null) { - record.addValue("journalRef", new StringValue(journalRef)); - } - - List primaryCategory = new LinkedList(); - List primaryCategoryList = XMLUtils.getElementList(dataRoot, - "arxiv:primary_category"); - if (primaryCategoryList != null) { - for (Element primaryCategoryElement : primaryCategoryList) { - primaryCategory - .add(primaryCategoryElement.getAttribute("term")); - } - } - - if (primaryCategory.size() > 0) { - List values = new LinkedList(); - for (String s : primaryCategory) { - values.add(new StringValue(s)); - } - record.addField("primaryCategory", values); - } - - List category = new LinkedList(); - List categoryList = XMLUtils.getElementList(dataRoot, - "category"); - if (categoryList != null) { - for (Element categoryElement : categoryList) { - category.add(categoryElement.getAttribute("term")); - } - } - - if (category.size() > 0) { - List values = new LinkedList(); - for (String s : category) { - values.add(new StringValue(s)); - } - record.addField("category", values); - } - - List authors = new LinkedList(); - List authorsWithAffiliations = new LinkedList(); - List authorList = XMLUtils.getElementList(dataRoot, "author"); - if (authorList != null) { - for (Element authorElement : authorList) { - String authorName = XMLUtils.getElementValue(authorElement, "name"); - String authorAffiliation = XMLUtils.getElementValue(authorElement, "arxiv:affiliation"); - - authors.add(authorName); - authorsWithAffiliations.add(authorName + ": " + authorAffiliation); - } - } - - if (authors.size() > 0) { - List values = new LinkedList(); - for (String sArray : authors) { - values.add(new StringValue(sArray)); - } - record.addField("author", values); - } - - if (authorsWithAffiliations.size() > 0) { - List values = new LinkedList(); - for (String sArray : authorsWithAffiliations) { - values.add(new StringValue(sArray)); - } - record.addField("authorWithAffiliation", values); - } - - return record; - } - -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedFileDataLoader.java b/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedFileDataLoader.java deleted file mode 100644 index 05a37e64d665..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedFileDataLoader.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ - -package org.dspace.submit.lookup; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.Map; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import gr.ekt.bte.core.DataLoadingSpec; -import gr.ekt.bte.core.Record; -import gr.ekt.bte.core.RecordSet; -import gr.ekt.bte.core.Value; -import gr.ekt.bte.dataloader.FileDataLoader; -import gr.ekt.bte.exceptions.MalformedSourceException; -import org.apache.commons.lang3.StringUtils; -import org.dspace.app.util.XMLUtils; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class PubmedFileDataLoader extends FileDataLoader { - - Map fieldMap; // mapping between service fields and local - // intermediate fields - - /** - * - */ - public PubmedFileDataLoader() { - } - - /** - * @param filename Name of file to load CiNii data from. - */ - public PubmedFileDataLoader(String filename) { - super(filename); - } - - /* - * {@see gr.ekt.bte.core.DataLoader#getRecords()} - * - * @throws MalformedSourceException - */ - @Override - public RecordSet getRecords() throws MalformedSourceException { - - RecordSet recordSet = new RecordSet(); - - try { - InputStream inputStream = new FileInputStream(new File(filename)); - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder builder = factory.newDocumentBuilder(); - Document inDoc = builder.parse(inputStream); - - Element xmlRoot = inDoc.getDocumentElement(); - List pubArticles = XMLUtils.getElementList(xmlRoot, - "PubmedArticle"); - - for (Element xmlArticle : pubArticles) { - Record record = null; - try { - record = PubmedUtils.convertPubmedDomToRecord(xmlArticle); - recordSet.addRecord(convertFields(record)); - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - } catch (SAXException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - return recordSet; - - } - - /* - * (non-Javadoc) - * - * @see - * gr.ekt.bte.core.DataLoader#getRecords(gr.ekt.bte.core.DataLoadingSpec) - */ - @Override - public RecordSet getRecords(DataLoadingSpec spec) - throws MalformedSourceException { - if (spec.getOffset() > 0) { - return new RecordSet(); - } - return getRecords(); - } - - public Record convertFields(Record publication) { - for (String fieldName : fieldMap.keySet()) { - String md = null; - if (fieldMap != null) { - md = this.fieldMap.get(fieldName); - } - - if (StringUtils.isBlank(md)) { - continue; - } else { - md = md.trim(); - } - - if (publication.isMutable()) { - List values = publication.getValues(fieldName); - publication.makeMutable().removeField(fieldName); - publication.makeMutable().addField(md, values); - } - } - - return publication; - } - - public void setFieldMap(Map fieldMap) { - this.fieldMap = fieldMap; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedOnlineDataLoader.java b/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedOnlineDataLoader.java deleted file mode 100644 index 094ce4e21dc1..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedOnlineDataLoader.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import gr.ekt.bte.core.Record; -import org.apache.http.HttpException; -import org.apache.logging.log4j.Logger; -import org.dspace.core.Context; -import org.dspace.core.LogManager; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class PubmedOnlineDataLoader extends NetworkSubmissionLookupDataLoader { - protected boolean searchProvider = true; - - private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(PubmedOnlineDataLoader.class); - - protected PubmedService pubmedService = new PubmedService(); - - public void setPubmedService(PubmedService pubmedService) { - this.pubmedService = pubmedService; - } - - @Override - public List getSupportedIdentifiers() { - return Arrays.asList(new String[] {PUBMED, DOI}); - } - - public void setSearchProvider(boolean searchProvider) { - this.searchProvider = searchProvider; - } - - @Override - public boolean isSearchProvider() { - return searchProvider; - } - - @Override - public List getByIdentifier(Context context, - Map> keys) throws HttpException, IOException { - Set pmids = keys != null ? keys.get(PUBMED) : null; - Set dois = keys != null ? keys.get(DOI) : null; - List results = new ArrayList(); - if (pmids != null && pmids.size() > 0 - && (dois == null || dois.size() == 0)) { - for (String pmid : pmids) { - Record p = null; - try { - p = pubmedService.getByPubmedID(pmid); - } catch (Exception e) { - log.error(LogManager.getHeader(context, "getByIdentifier", - "pmid=" + pmid), e); - } - if (p != null) { - results.add(convertFields(p)); - } - } - } else if (dois != null && dois.size() > 0 - && (pmids == null || pmids.size() == 0)) { - StringBuffer query = new StringBuffer(); - for (String d : dois) { - if (query.length() > 0) { - query.append(" OR "); - } - query.append(d).append("[AI]"); - } - - List pubmedResults = pubmedService.search(query.toString()); - for (Record p : pubmedResults) { - results.add(convertFields(p)); - } - } else if (dois != null && dois.size() > 0 && pmids != null - && pmids.size() > 0) { - // EKT:ToDo: support list of dois and pmids in the search method of - // pubmedService - List pubmedResults = pubmedService.search(dois.iterator() - .next(), pmids.iterator().next()); - if (pubmedResults != null) { - for (Record p : pubmedResults) { - results.add(convertFields(p)); - } - } - } - - return results; - } - - @Override - public List search(Context context, String title, String author, - int year) throws HttpException, IOException { - List pubmedResults = pubmedService.search(title, author, year); - List results = new ArrayList(); - if (pubmedResults != null) { - for (Record p : pubmedResults) { - results.add(convertFields(p)); - } - } - return results; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java b/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java deleted file mode 100644 index fa30ee8ea5e9..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java +++ /dev/null @@ -1,265 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import gr.ekt.bte.core.Record; -import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.CoreConnectionPNames; -import org.apache.logging.log4j.Logger; -import org.dspace.app.util.XMLUtils; -import org.dspace.core.ConfigurationManager; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class PubmedService { - - private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(PubmedService.class); - - protected int timeout = 1000; - - public void setTimeout(int timeout) { - this.timeout = timeout; - } - - public Record getByPubmedID(String pubmedid) throws HttpException, - IOException, ParserConfigurationException, SAXException { - List ids = new ArrayList(); - ids.add(pubmedid.trim()); - List items = getByPubmedIDs(ids); - if (items != null && items.size() > 0) { - return items.get(0); - } - return null; - } - - public List search(String title, String author, int year) - throws HttpException, IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(title)) { - query.append("((").append(title).append("[TI]) OR ("); - // [TI] does not always work, book chapter title - query.append("(").append(title).append("[book]))"); - } - if (StringUtils.isNotBlank(author)) { - // [FAU] - if (query.length() > 0) { - query.append(" AND "); - } - query.append("(").append(author).append("[AU])"); - } - if (year != -1) { - // [DP] - if (query.length() > 0) { - query.append(" AND "); - } - query.append(year).append("[DP]"); - } - return search(query.toString()); - } - - public List search(String query) throws IOException, HttpException { - List results = new ArrayList<>(); - if (!ConfigurationManager.getBooleanProperty(SubmissionLookupService.CFG_MODULE, "remoteservice.demo")) { - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - - URIBuilder uriBuilder = new URIBuilder( - "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"); - uriBuilder.addParameter("db", "pubmed"); - uriBuilder.addParameter("datetype", "edat"); - uriBuilder.addParameter("retmax", "10"); - uriBuilder.addParameter("term", query); - method = new HttpGet(uriBuilder.build()); - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine statusLine = response.getStatusLine(); - int statusCode = statusLine.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - throw new RuntimeException("WS call failed: " - + statusLine); - } - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder builder; - try { - builder = factory.newDocumentBuilder(); - - Document inDoc = builder.parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - Element idList = XMLUtils.getSingleElement(xmlRoot, - "IdList"); - List pubmedIDs = XMLUtils.getElementValueList( - idList, "Id"); - results = getByPubmedIDs(pubmedIDs); - } catch (ParserConfigurationException e1) { - log.error(e1.getMessage(), e1); - } catch (SAXException e1) { - log.error(e1.getMessage(), e1); - } - } catch (Exception e1) { - log.error(e1.getMessage(), e1); - } finally { - if (method != null) { - method.releaseConnection(); - } - } - } else { - InputStream stream = null; - try { - File file = new File( - ConfigurationManager.getProperty("dspace.dir") - + "/config/crosswalks/demo/pubmed-search.xml"); - stream = new FileInputStream(file); - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder builder = factory.newDocumentBuilder(); - Document inDoc = builder.parse(stream); - - Element xmlRoot = inDoc.getDocumentElement(); - Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList"); - List pubmedIDs = XMLUtils.getElementValueList(idList, - "Id"); - results = getByPubmedIDs(pubmedIDs); - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } finally { - if (stream != null) { - try { - stream.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - } - return results; - } - - public List getByPubmedIDs(List pubmedIDs) - throws HttpException, IOException, ParserConfigurationException, - SAXException { - List results = new ArrayList(); - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 5 * timeout); - - try { - URIBuilder uriBuilder = new URIBuilder( - "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"); - uriBuilder.addParameter("db", "pubmed"); - uriBuilder.addParameter("retmode", "xml"); - uriBuilder.addParameter("rettype", "full"); - uriBuilder.addParameter("id", StringUtils.join( - pubmedIDs.iterator(), ",")); - method = new HttpGet(uriBuilder.build()); - } catch (URISyntaxException ex) { - throw new RuntimeException("Request not sent", ex); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine statusLine = response.getStatusLine(); - int statusCode = statusLine.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - throw new RuntimeException("WS call failed: " + statusLine); - } - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - - DocumentBuilder builder = factory.newDocumentBuilder(); - Document inDoc = builder - .parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - List pubArticles = XMLUtils.getElementList(xmlRoot, - "PubmedArticle"); - - for (Element xmlArticle : pubArticles) { - Record pubmedItem = null; - try { - pubmedItem = PubmedUtils - .convertPubmedDomToRecord(xmlArticle); - results.add(pubmedItem); - } catch (Exception e) { - throw new RuntimeException( - "PubmedID is not valid or not exist: " - + e.getMessage(), e); - } - } - - return results; - } finally { - if (method != null) { - method.releaseConnection(); - } - } - } - - public List search(String doi, String pmid) throws HttpException, - IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(doi)) { - query.append(doi); - query.append("[AID]"); - } - if (StringUtils.isNotBlank(pmid)) { - // [FAU] - if (query.length() > 0) { - query.append(" OR "); - } - query.append(pmid).append("[PMID]"); - } - return search(query.toString()); - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedUtils.java b/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedUtils.java deleted file mode 100644 index bca34de295b0..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedUtils.java +++ /dev/null @@ -1,316 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -/** - * - */ -package org.dspace.submit.lookup; - -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import gr.ekt.bte.core.MutableRecord; -import gr.ekt.bte.core.Record; -import gr.ekt.bte.core.StringValue; -import gr.ekt.bte.core.Value; -import org.apache.commons.lang3.StringUtils; -import org.dspace.app.util.XMLUtils; -import org.dspace.submit.util.SubmissionLookupPublication; -import org.w3c.dom.Element; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class PubmedUtils { - - /** - * Default constructor - */ - private PubmedUtils() { } - - public static Record convertPubmedDomToRecord(Element pubArticle) { - MutableRecord record = new SubmissionLookupPublication(""); - - Map monthToNum = new HashMap(); - monthToNum.put("Jan", "01"); - monthToNum.put("Feb", "02"); - monthToNum.put("Mar", "03"); - monthToNum.put("Apr", "04"); - monthToNum.put("May", "05"); - monthToNum.put("Jun", "06"); - monthToNum.put("Jul", "07"); - monthToNum.put("Aug", "08"); - monthToNum.put("Sep", "09"); - monthToNum.put("Oct", "10"); - monthToNum.put("Nov", "11"); - monthToNum.put("Dec", "12"); - - Element medline = XMLUtils.getSingleElement(pubArticle, - "MedlineCitation"); - - Element article = XMLUtils.getSingleElement(medline, "Article"); - Element pubmed = XMLUtils.getSingleElement(pubArticle, "PubmedData"); - - Element identifierList = XMLUtils.getSingleElement(pubmed, - "ArticleIdList"); - if (identifierList != null) { - List identifiers = XMLUtils.getElementList(identifierList, - "ArticleId"); - if (identifiers != null) { - for (Element id : identifiers) { - if ("pubmed".equals(id.getAttribute("IdType"))) { - String pubmedID = id.getTextContent().trim(); - if (pubmedID != null) { - record.addValue("pubmedID", new StringValue( - pubmedID)); - } - } else if ("doi".equals(id.getAttribute("IdType"))) { - String doi = id.getTextContent().trim(); - if (doi != null) { - record.addValue("doi", new StringValue(doi)); - } - } - } - } - } - - String status = XMLUtils.getElementValue(pubmed, "PublicationStatus"); - if (status != null) { - record.addValue("publicationStatus", new StringValue(status)); - } - - String pubblicationModel = XMLUtils.getElementAttribute(medline, - "Article", "PubModel"); - if (pubblicationModel != null) { - record.addValue("pubModel", new StringValue( - pubblicationModel)); - } - - String title = XMLUtils.getElementValue(article, "ArticleTitle"); - if (title != null) { - record.addValue("articleTitle", new StringValue(title)); - } - - Element abstractElement = XMLUtils - .getSingleElement(article, "Abstract"); - if (abstractElement == null) { - abstractElement = XMLUtils.getSingleElement(medline, - "OtherAbstract"); - } - if (abstractElement != null) { - String summary = XMLUtils.getElementValue(abstractElement, - "AbstractText"); - if (summary != null) { - record.addValue("abstractText", new StringValue(summary)); - } - } - - List authors = new LinkedList(); - Element authorList = XMLUtils.getSingleElement(article, "AuthorList"); - if (authorList != null) { - List authorsElement = XMLUtils.getElementList(authorList, - "Author"); - if (authorsElement != null) { - for (Element author : authorsElement) { - if (StringUtils.isBlank(XMLUtils.getElementValue(author, - "CollectiveName"))) { - authors.add(new String[] { - XMLUtils.getElementValue(author, "ForeName"), - XMLUtils.getElementValue(author, "LastName")}); - } - } - } - } - if (authors.size() > 0) { - List values = new LinkedList(); - for (String[] sArray : authors) { - values.add(new StringValue(sArray[1] + ", " + sArray[0])); - } - record.addField("author", values); - } - - Element journal = XMLUtils.getSingleElement(article, "Journal"); - if (journal != null) { - List jnumbers = XMLUtils.getElementList(journal, "ISSN"); - if (jnumbers != null) { - for (Element jnumber : jnumbers) { - if ("Print".equals(jnumber.getAttribute("IssnType"))) { - String issn = jnumber.getTextContent().trim(); - if (issn != null) { - record.addValue("printISSN", new StringValue(issn)); - } - } else { - String eissn = jnumber.getTextContent().trim(); - if (eissn != null) { - record.addValue("electronicISSN", new StringValue(eissn)); - } - } - } - } - - String journalTitle = XMLUtils.getElementValue(journal, "Title"); - if (journalTitle != null) { - record.addValue("journalTitle", new StringValue(journalTitle)); - } - - Element journalIssueElement = XMLUtils.getSingleElement(journal, - "JournalIssue"); - if (journalIssueElement != null) { - String volume = XMLUtils.getElementValue(journalIssueElement, - "Volume"); - if (volume != null) { - record.addValue("journalVolume", new StringValue(volume)); - } - - String issue = XMLUtils.getElementValue(journalIssueElement, - "Issue"); - if (issue != null) { - record.addValue("journalIssue", new StringValue(issue)); - } - - Element pubDateElement = XMLUtils.getSingleElement( - journalIssueElement, "PubDate"); - - String pubDate = null; - if (pubDateElement != null) { - pubDate = XMLUtils.getElementValue(pubDateElement, "Year"); - - String mounth = XMLUtils.getElementValue(pubDateElement, - "Month"); - String day = XMLUtils - .getElementValue(pubDateElement, "Day"); - if (StringUtils.isNotBlank(mounth) - && monthToNum.containsKey(mounth)) { - pubDate += "-" + monthToNum.get(mounth); - if (StringUtils.isNotBlank(day)) { - pubDate += "-" + (day.length() == 1 ? "0" + day : day); - } - } - } - if (pubDate == null) { - pubDate = XMLUtils.getElementValue(pubDateElement, "MedlineDate"); - } - if (pubDate != null) { - record.addValue("pubDate", new StringValue(pubDate)); - } - } - - String language = XMLUtils.getElementValue(article, "Language"); - if (language != null) { - record.addValue("language", new StringValue(language)); - } - - List type = new LinkedList(); - Element publicationTypeList = XMLUtils.getSingleElement(article, - "PublicationTypeList"); - if (publicationTypeList != null) { - List publicationTypes = XMLUtils.getElementList( - publicationTypeList, "PublicationType"); - for (Element publicationType : publicationTypes) { - type.add(publicationType.getTextContent().trim()); - } - } - if (type.size() > 0) { - List values = new LinkedList(); - for (String s : type) { - values.add(new StringValue(s)); - } - record.addField("publicationType", values); - } - - List primaryKeywords = new LinkedList(); - List secondaryKeywords = new LinkedList(); - Element keywordsList = XMLUtils.getSingleElement(medline, - "KeywordList"); - if (keywordsList != null) { - List keywords = XMLUtils.getElementList(keywordsList, - "Keyword"); - for (Element keyword : keywords) { - if ("Y".equals(keyword.getAttribute("MajorTopicYN"))) { - primaryKeywords.add(keyword.getTextContent().trim()); - } else { - secondaryKeywords.add(keyword.getTextContent().trim()); - } - } - } - if (primaryKeywords.size() > 0) { - List values = new LinkedList(); - for (String s : primaryKeywords) { - values.add(new StringValue(s)); - } - record.addField("primaryKeyword", values); - } - if (secondaryKeywords.size() > 0) { - List values = new LinkedList(); - for (String s : secondaryKeywords) { - values.add(new StringValue(s)); - } - record.addField("secondaryKeyword", values); - } - - List primaryMeshHeadings = new LinkedList(); - List secondaryMeshHeadings = new LinkedList(); - Element meshHeadingsList = XMLUtils.getSingleElement(medline, - "MeshHeadingList"); - if (meshHeadingsList != null) { - List meshHeadings = XMLUtils.getElementList( - meshHeadingsList, "MeshHeading"); - for (Element meshHeading : meshHeadings) { - if ("Y".equals(XMLUtils.getElementAttribute(meshHeading, - "DescriptorName", "MajorTopicYN"))) { - primaryMeshHeadings.add(XMLUtils.getElementValue( - meshHeading, "DescriptorName")); - } else { - secondaryMeshHeadings.add(XMLUtils.getElementValue( - meshHeading, "DescriptorName")); - } - } - } - if (primaryMeshHeadings.size() > 0) { - List values = new LinkedList(); - for (String s : primaryMeshHeadings) { - values.add(new StringValue(s)); - } - record.addField("primaryMeshHeading", values); - } - if (secondaryMeshHeadings.size() > 0) { - List values = new LinkedList(); - for (String s : secondaryMeshHeadings) { - values.add(new StringValue(s)); - } - record.addField("secondaryMeshHeading", values); - } - - Element paginationElement = XMLUtils.getSingleElement(article, - "Pagination"); - if (paginationElement != null) { - String startPage = XMLUtils.getElementValue(paginationElement, - "StartPage"); - String endPage = XMLUtils.getElementValue(paginationElement, - "EndPage"); - if (StringUtils.isBlank(startPage)) { - startPage = XMLUtils.getElementValue(paginationElement, - "MedlinePgn"); - } - - if (startPage != null) { - record.addValue("startPage", new StringValue(startPage)); - } - if (endPage != null) { - record.addValue("endPage", new StringValue(endPage)); - } - } - } - - return record; - } -} diff --git a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml index d2d6dd10c1df..750e5eb309b8 100644 --- a/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml +++ b/dspace-api/src/main/resources/spring/spring-dspace-addon-import-services.xml @@ -36,7 +36,6 @@ - - - @@ -79,7 +77,6 @@ jeissn pisbn eisbn - arxivCategory keywords mesh language @@ -106,13 +103,9 @@ - - - - @@ -129,40 +122,11 @@ - - - - - - - - - - - arxivCategory - - - - - - - - - - - - - - publicationStatus - - - - @@ -357,75 +321,6 @@ value="http://ebooks.serrelib.gr/serrelib-oai/request" /> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -544,7 +439,6 @@ - @@ -553,7 +447,6 @@ - diff --git a/dspace/config/spring/api/step-processing-listener.xml b/dspace/config/spring/api/step-processing-listener.xml index eb016c513355..986b850875db 100644 --- a/dspace/config/spring/api/step-processing-listener.xml +++ b/dspace/config/spring/api/step-processing-listener.xml @@ -13,9 +13,7 @@ - - From af38c36bdb094d91f934565d79bab66e29679363 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Mon, 6 Jul 2020 11:33:52 +0200 Subject: [PATCH 12/34] add JavaDoc to LiveImportDataProvider public method --- .../provider/impl/LiveImportDataProvider.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java index 776984ddb769..7bc9766a19e1 100644 --- a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java +++ b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java @@ -47,18 +47,34 @@ public String getSourceIdentifier() { return sourceIdentifier; } + /** + * This method set the SourceIdentifier for the ExternalDataProvider + * @param sourceIdentifier The UNIQUE sourceIdentifier to be set on any LiveImport data provider + */ public void setSourceIdentifier(String sourceIdentifier) { this.sourceIdentifier = sourceIdentifier; } + /** + * This method set the MetadataSource for the ExternalDataProvider + * @param metadataSource {@link org.dspace.importer.external.service.components.MetadataSource} implementation used to process the input data + */ public void setMetadataSource(MetadataSource metadataSource) { this.metadataSource = metadataSource; } + /** + * This method set dublin core identifier to use as metadata id + * @param recordIdMetadata dublin core identifier to use as metadata id + */ public void setRecordIdMetadata(String recordIdMetadata) { this.recordIdMetadata = recordIdMetadata; } + /** + * This method set the dublin core identifier to display the title + * @param displayMetadata metadata to use as title + */ public void setDisplayMetadata(String displayMetadata) { this.displayMetadata = displayMetadata; } From 76d2e067108a3b7099e5e901cf69d47bb6d71396 Mon Sep 17 00:00:00 2001 From: Mykhaylo Date: Mon, 20 Jul 2020 17:53:07 +0200 Subject: [PATCH 13/34] added Logger --- .../contributor/ArXivIdMetadataContributor.java | 8 ++++++-- .../contributor/SimpleXpathMetadatumContributor.java | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index 727a3fcf4e37..a1f0c34f6ec9 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -22,6 +22,8 @@ import org.dspace.importer.external.metadatamapping.MetadatumDTO; import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor; import org.jaxen.JaxenException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Required; /** @@ -34,6 +36,8 @@ public class ArXivIdMetadataContributor implements MetadataContributor { private MetadataFieldConfig field; + private static final Logger log = LoggerFactory.getLogger(ArXivIdMetadataContributor.class); + /** * Return prefixToNamespaceMapping * @@ -160,13 +164,13 @@ public Collection contributeMetadata(OMElement t) { } else if (el instanceof OMText) { values.add(metadataFieldMapping.toDCValue(field, ((OMText) el).getText())); } else { - System.err.println("node of type: " + el.getClass()); + log.error("node of type: " + el.getClass()); } } parseValue(values); return values; } catch (JaxenException e) { - System.err.println(query); + log.error(query); throw new RuntimeException(e); } } diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java index ba5afceb5f97..b612bb84bc4c 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java @@ -21,6 +21,8 @@ import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; import org.dspace.importer.external.metadatamapping.MetadatumDTO; import org.jaxen.JaxenException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Required; /** @@ -31,6 +33,8 @@ public class SimpleXpathMetadatumContributor implements MetadataContributor { private MetadataFieldConfig field; + private static final Logger log = LoggerFactory.getLogger(SimpleXpathMetadatumContributor.class); + /** * Return prefixToNamespaceMapping * @@ -157,12 +161,12 @@ public Collection contributeMetadata(OMElement t) { } else if (el instanceof OMText) { values.add(metadataFieldMapping.toDCValue(field, ((OMText) el).getText())); } else { - System.err.println("node of type: " + el.getClass()); + log.error("node of type: " + el.getClass()); } } return values; } catch (JaxenException e) { - System.err.println(query); + log.error(query); throw new RuntimeException(e); } From 25bed852d39d0a95fdd90af77017346495ff521e Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 29 Jul 2020 01:19:20 +0200 Subject: [PATCH 14/34] Add comment, rewrite ArxivIdMetadataContributor --- .../ArXivIdMetadataContributor.java | 155 ++---------------- .../ArXivImportMetadataSourceServiceImpl.java | 39 ++++- 2 files changed, 50 insertions(+), 144 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index a1f0c34f6ec9..077fe85675d3 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -8,23 +8,11 @@ package org.dspace.importer.external.arxiv.metadatamapping.contributor; import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import javax.annotation.Resource; -import org.apache.axiom.om.OMAttribute; import org.apache.axiom.om.OMElement; -import org.apache.axiom.om.OMText; -import org.apache.axiom.om.xpath.AXIOMXPath; -import org.dspace.importer.external.metadatamapping.MetadataFieldConfig; -import org.dspace.importer.external.metadatamapping.MetadataFieldMapping; import org.dspace.importer.external.metadatamapping.MetadatumDTO; import org.dspace.importer.external.metadatamapping.contributor.MetadataContributor; -import org.jaxen.JaxenException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Required; +import org.dspace.importer.external.metadatamapping.contributor.SimpleXpathMetadatumContributor; /** * Arxiv specific implementation of {@link MetadataContributor} @@ -33,109 +21,7 @@ * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) * */ -public class ArXivIdMetadataContributor implements MetadataContributor { - private MetadataFieldConfig field; - - private static final Logger log = LoggerFactory.getLogger(ArXivIdMetadataContributor.class); - - /** - * Return prefixToNamespaceMapping - * - * @return a prefixToNamespaceMapping map - */ - public Map getPrefixToNamespaceMapping() { - return prefixToNamespaceMapping; - } - - private MetadataFieldMapping> metadataFieldMapping; - - /** - * Return metadataFieldMapping - * - * @return MetadataFieldMapping - */ - public MetadataFieldMapping> getMetadataFieldMapping() { - return metadataFieldMapping; - } - - /** - * Set the metadataFieldMapping of this ArXivIdMetadataContributor - * - * @param metadataFieldMapping the new mapping. - */ - public void setMetadataFieldMapping( - MetadataFieldMapping> metadataFieldMapping) { - this.metadataFieldMapping = metadataFieldMapping; - } - - /** - * Set the prefixToNamespaceMapping for this object, - * - * @param prefixToNamespaceMapping the new mapping. - */ - @Resource(name = "isiFullprefixMapping") - public void setPrefixToNamespaceMapping(Map prefixToNamespaceMapping) { - this.prefixToNamespaceMapping = prefixToNamespaceMapping; - } - - private Map prefixToNamespaceMapping; - - /** - * Initialize ArXivIdMetadataContributor with a query, prefixToNamespaceMapping and MetadataFieldConfig - * - * @param query query string - * @param prefixToNamespaceMapping metadata prefix to namespace mapping - * @param field - * MetadataFieldConfig - */ - public ArXivIdMetadataContributor(String query, Map prefixToNamespaceMapping, - MetadataFieldConfig field) { - this.query = query; - this.prefixToNamespaceMapping = prefixToNamespaceMapping; - this.field = field; - } - - /** - * Empty constructor for ArXivIdMetadataContributor - */ - public ArXivIdMetadataContributor() { - - } - - private String query; - - /** - * Return the MetadataFieldConfig used while retrieving MetadatumDTO - * - * @return MetadataFieldConfig - */ - public MetadataFieldConfig getField() { - return field; - } - - /** - * Setting the MetadataFieldConfig - * - * @param field MetadataFieldConfig used while retrieving MetadatumDTO - */ - @Required - public void setField(MetadataFieldConfig field) { - this.field = field; - } - - /** - * Return query used to create an xpathExpression on, this query is used to - * - * @return the query this instance is based on - */ - public String getQuery() { - return query; - } - - @Required - public void setQuery(String query) { - this.query = query; - } +public class ArXivIdMetadataContributor extends SimpleXpathMetadatumContributor { /** * Retrieve the metadata associated with the given object. @@ -147,35 +33,18 @@ public void setQuery(String query) { */ @Override public Collection contributeMetadata(OMElement t) { - List values = new LinkedList<>(); - try { - AXIOMXPath xpath = new AXIOMXPath(query); - for (String ns : prefixToNamespaceMapping.keySet()) { - xpath.addNamespace(prefixToNamespaceMapping.get(ns), ns); - } - List nodes = xpath.selectNodes(t); - for (Object el : nodes) { - if (el instanceof OMElement) { - values.add(metadataFieldMapping.toDCValue(field, ((OMElement) el).getText())); - } else if (el instanceof OMAttribute) { - values.add(metadataFieldMapping.toDCValue(field, ((OMAttribute) el).getAttributeValue())); - } else if (el instanceof String) { - values.add(metadataFieldMapping.toDCValue(field, (String) el)); - } else if (el instanceof OMText) { - values.add(metadataFieldMapping.toDCValue(field, ((OMText) el).getText())); - } else { - log.error("node of type: " + el.getClass()); - } - } - parseValue(values); - return values; - } catch (JaxenException e) { - log.error(query); - throw new RuntimeException(e); - } + Collection values = super.contributeMetadata(t); + parseValue(values); + return values; } - private void parseValue(List dtos) { + /** + * ArXiv doesn't return the item id. We have to get this from the path parameter + * + * @param dtos Metadata which contains the items uri + * @return the items ids + */ + private void parseValue(Collection dtos) { if (dtos != null) { for (MetadatumDTO dto : dtos) { if (dto != null && dto.getValue() != null && dto.getValue().contains("/")) { diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 863a8144b4d3..4320856842e6 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -164,6 +164,13 @@ public Collection findMatchingRecords(Query query) throws Metadata return retry(new FindMatchingRecordCallable(query)); } + /** + * This class is a Callable implementation to count the number of entries for an ArXiv + * query. + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ private class CountByQueryCallable implements Callable { private Query query; @@ -207,7 +214,14 @@ public Integer call() throws Exception { } } - + /** + * This class is a Callable implementation to get ArXiv entries based on + * query object. + * + * @see org.dspace.importer.external.datamodel.Query + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ private class SearchByQueryCallable implements Callable> { private Query query; @@ -239,6 +253,9 @@ public List call() throws Exception { } Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); + if (response.getStatus() == 400) { + throw new IllegalArgumentException("Invalid ArXiv ID"); + } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { @@ -248,6 +265,12 @@ public List call() throws Exception { } } + /** + * This class is a Callable implementation to get ArXiv entry using ArXiv ID + * + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ private class SearchByIdCallable implements Callable> { private Query query; @@ -275,6 +298,9 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("id_list", arxivid); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); + if (response.getStatus() == 400) { + throw new IllegalArgumentException("Invalid ArXiv ID"); + } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { @@ -284,6 +310,14 @@ public List call() throws Exception { } } + /** + * This class is a Callable implementation to search ArXiv entries + * using author and title. + * + * @see org.dspace.importer.external.datamodel.Query + * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) + * + */ private class FindMatchingRecordCallable implements Callable> { private Query query; @@ -299,6 +333,9 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("search_query", queryString); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); + if (response.getStatus() == 400) { + throw new IllegalArgumentException("Invalid ArXiv ID"); + } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { From afd6436c5c3b65697692c741244b258f9a94546b Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 29 Jul 2020 01:44:25 +0200 Subject: [PATCH 15/34] Clean code --- .../service/ArXivImportMetadataSourceServiceImpl.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 4320856842e6..869aaecd36f6 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -253,9 +253,6 @@ public List call() throws Exception { } Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - if (response.getStatus() == 400) { - throw new IllegalArgumentException("Invalid ArXiv ID"); - } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { @@ -298,9 +295,6 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("id_list", arxivid); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - if (response.getStatus() == 400) { - throw new IllegalArgumentException("Invalid ArXiv ID"); - } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { @@ -333,9 +327,6 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("search_query", queryString); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - if (response.getStatus() == 400) { - throw new IllegalArgumentException("Invalid ArXiv ID"); - } String responseString = response.readEntity(String.class); List omElements = splitToRecords(responseString); for (OMElement record : omElements) { From 1406a4c2e669b0dea1f9499eacfac033dbc7dddf Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Fri, 31 Jul 2020 17:08:23 +0200 Subject: [PATCH 16/34] Update pom.xml, ArXivService.java, and PubmedService.java --- dspace-api/pom.xml | 6 - .../dspace/submit/lookup/ArXivService.java | 162 ----------- .../dspace/submit/lookup/PubmedService.java | 274 ------------------ 3 files changed, 442 deletions(-) delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java delete mode 100644 dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index e07e1818581a..7a8662bcffcc 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -681,12 +681,6 @@ jersey-client ${jersey.version} - - - org.glassfish.jersey.inject - jersey-hk2 - ${jersey.version} - com.amazonaws diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java b/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java deleted file mode 100644 index 337fb4175a80..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java +++ /dev/null @@ -1,162 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - -import gr.ekt.bte.core.Record; -import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.CoreConnectionPNames; -import org.apache.http.params.HttpParams; -import org.dspace.app.util.XMLUtils; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class ArXivService { - private int timeout = 1000; - - /** - * How long to wait for a connection to be established. - * - * @param timeout milliseconds - */ - public void setTimeout(int timeout) { - this.timeout = timeout; - } - - public List getByDOIs(Set dois) throws HttpException, - IOException { - if (dois != null && dois.size() > 0) { - String doisQuery = StringUtils.join(dois.iterator(), " OR "); - return search(doisQuery, null, 100); - } - return null; - } - - public List searchByTerm(String title, String author, int year) - throws HttpException, IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(title)) { - query.append("ti:\"").append(title).append("\""); - } - if (StringUtils.isNotBlank(author)) { - // [FAU] - if (query.length() > 0) { - query.append(" AND "); - } - query.append("au:\"").append(author).append("\""); - } - return search(query.toString(), "", 10); - } - - protected List search(String query, String arxivid, int max_result) - throws IOException, HttpException { - List results = new ArrayList(); - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - HttpParams params = client.getParams(); - params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - - try { - URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); - uriBuilder.addParameter("id_list", arxivid); - uriBuilder.addParameter("search_query", query); - uriBuilder.addParameter("max_results", String.valueOf(max_result)); - method = new HttpGet(uriBuilder.build()); - } catch (URISyntaxException ex) { - throw new HttpException(ex.getMessage()); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine responseStatus = response.getStatusLine(); - int statusCode = responseStatus.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - if (statusCode == HttpStatus.SC_BAD_REQUEST) { - throw new RuntimeException("arXiv query is not valid"); - } else { - throw new RuntimeException("Http call failed: " - + responseStatus); - } - } - - try { - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - - DocumentBuilder db = factory.newDocumentBuilder(); - Document inDoc = db.parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - List dataRoots = XMLUtils.getElementList(xmlRoot, - "entry"); - - for (Element dataRoot : dataRoots) { - Record crossitem = ArxivUtils - .convertArxixDomToRecord(dataRoot); - if (crossitem != null) { - results.add(crossitem); - } - } - } catch (Exception e) { - throw new RuntimeException( - "ArXiv identifier is not valid or not exist"); - } - } finally { - if (method != null) { - method.releaseConnection(); - } - } - - return results; - } - - public Record getByArXivIDs(String raw) throws HttpException, IOException { - if (StringUtils.isNotBlank(raw)) { - raw = raw.trim(); - if (raw.startsWith("http://arxiv.org/abs/")) { - raw = raw.substring("http://arxiv.org/abs/".length()); - } else if (raw.toLowerCase().startsWith("arxiv:")) { - raw = raw.substring("arxiv:".length()); - } - List result = search("", raw, 1); - if (result != null && result.size() > 0) { - return result.get(0); - } - } - return null; - } -} diff --git a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java b/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java deleted file mode 100644 index a5e74322f58b..000000000000 --- a/dspace-api/src/main/java/org/dspace/submit/lookup/PubmedService.java +++ /dev/null @@ -1,274 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.submit.lookup; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import gr.ekt.bte.core.Record; -import org.apache.commons.lang3.StringUtils; -import org.apache.http.HttpException; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.CoreConnectionPNames; -import org.apache.logging.log4j.Logger; -import org.dspace.app.util.XMLUtils; -import org.dspace.core.ConfigurationManager; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; - -/** - * @author Andrea Bollini - * @author Kostas Stamatis - * @author Luigi Andrea Pascarelli - * @author Panagiotis Koutsourakis - */ -public class PubmedService { - - private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(PubmedService.class); - - protected int timeout = 1000; - - public void setTimeout(int timeout) { - this.timeout = timeout; - } - - public Record getByPubmedID(String pubmedid) throws HttpException, - IOException, ParserConfigurationException, SAXException { - List ids = new ArrayList(); - ids.add(pubmedid.trim()); - List items = getByPubmedIDs(ids); - if (items != null && items.size() > 0) { - return items.get(0); - } - return null; - } - - public List search(String title, String author, int year) - throws HttpException, IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(title)) { - query.append("((").append(title).append("[TI]) OR ("); - // [TI] does not always work, book chapter title - query.append("(").append(title).append("[book]))"); - } - if (StringUtils.isNotBlank(author)) { - // [FAU] - if (query.length() > 0) { - query.append(" AND "); - } - query.append("(").append(author).append("[AU])"); - } - if (year != -1) { - // [DP] - if (query.length() > 0) { - query.append(" AND "); - } - query.append(year).append("[DP]"); - } - return search(query.toString()); - } - - public List search(String query) throws IOException, HttpException { - List results = new ArrayList<>(); - if (!ConfigurationManager.getBooleanProperty(SubmissionLookupService.CFG_MODULE, "remoteservice.demo")) { - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); - - URIBuilder uriBuilder = new URIBuilder( - "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"); - uriBuilder.addParameter("db", "pubmed"); - uriBuilder.addParameter("datetype", "edat"); - uriBuilder.addParameter("retmax", "10"); - uriBuilder.addParameter("term", query); - method = new HttpGet(uriBuilder.build()); - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine statusLine = response.getStatusLine(); - int statusCode = statusLine.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - throw new RuntimeException("WS call failed: " - + statusLine); - } - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - - DocumentBuilder builder; - try { - builder = factory.newDocumentBuilder(); - - Document inDoc = builder.parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - Element idList = XMLUtils.getSingleElement(xmlRoot, - "IdList"); - List pubmedIDs = XMLUtils.getElementValueList( - idList, "Id"); - results = getByPubmedIDs(pubmedIDs); - } catch (ParserConfigurationException e1) { - log.error(e1.getMessage(), e1); - } catch (SAXException e1) { - log.error(e1.getMessage(), e1); - } - } catch (Exception e1) { - log.error(e1.getMessage(), e1); - } finally { - if (method != null) { - method.releaseConnection(); - } - } - } else { - InputStream stream = null; - try { - File file = new File( - ConfigurationManager.getProperty("dspace.dir") - + "/config/crosswalks/demo/pubmed-search.xml"); - stream = new FileInputStream(file); - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - - DocumentBuilder builder = factory.newDocumentBuilder(); - Document inDoc = builder.parse(stream); - - Element xmlRoot = inDoc.getDocumentElement(); - Element idList = XMLUtils.getSingleElement(xmlRoot, "IdList"); - List pubmedIDs = XMLUtils.getElementValueList(idList, - "Id"); - results = getByPubmedIDs(pubmedIDs); - } catch (Exception e) { - throw new RuntimeException(e.getMessage(), e); - } finally { - if (stream != null) { - try { - stream.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - } - return results; - } - - public List getByPubmedIDs(List pubmedIDs) - throws HttpException, IOException, ParserConfigurationException, - SAXException { - List results = new ArrayList(); - HttpGet method = null; - try { - HttpClient client = new DefaultHttpClient(); - client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 5 * timeout); - - try { - URIBuilder uriBuilder = new URIBuilder( - "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"); - uriBuilder.addParameter("db", "pubmed"); - uriBuilder.addParameter("retmode", "xml"); - uriBuilder.addParameter("rettype", "full"); - uriBuilder.addParameter("id", StringUtils.join( - pubmedIDs.iterator(), ",")); - method = new HttpGet(uriBuilder.build()); - } catch (URISyntaxException ex) { - throw new RuntimeException("Request not sent", ex); - } - - // Execute the method. - HttpResponse response = client.execute(method); - StatusLine statusLine = response.getStatusLine(); - int statusCode = statusLine.getStatusCode(); - - if (statusCode != HttpStatus.SC_OK) { - throw new RuntimeException("WS call failed: " + statusLine); - } - - DocumentBuilderFactory factory = DocumentBuilderFactory - .newInstance(); - factory.setValidating(false); - factory.setIgnoringComments(true); - factory.setIgnoringElementContentWhitespace(true); - // disallow DTD parsing to ensure no XXE attacks can occur. - // See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - - DocumentBuilder builder = factory.newDocumentBuilder(); - Document inDoc = builder - .parse(response.getEntity().getContent()); - - Element xmlRoot = inDoc.getDocumentElement(); - List pubArticles = XMLUtils.getElementList(xmlRoot, - "PubmedArticle"); - - for (Element xmlArticle : pubArticles) { - Record pubmedItem = null; - try { - pubmedItem = PubmedUtils - .convertPubmedDomToRecord(xmlArticle); - results.add(pubmedItem); - } catch (Exception e) { - throw new RuntimeException( - "PubmedID is not valid or not exist: " - + e.getMessage(), e); - } - } - - return results; - } finally { - if (method != null) { - method.releaseConnection(); - } - } - } - - public List search(String doi, String pmid) throws HttpException, - IOException { - StringBuffer query = new StringBuffer(); - if (StringUtils.isNotBlank(doi)) { - query.append(doi); - query.append("[AID]"); - } - if (StringUtils.isNotBlank(pmid)) { - // [FAU] - if (query.length() > 0) { - query.append(" OR "); - } - query.append(pmid).append("[PMID]"); - } - return search(query.toString()); - } -} From e35084cf239067f1118fcca5e3793aeda264d220 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Fri, 31 Jul 2020 17:13:18 +0200 Subject: [PATCH 17/34] rollback dspace.cfg changes --- dspace/config/dspace.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index efe61c3a5826..1bbd8951315c 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -20,7 +20,7 @@ # DSpace installation directory # Windows note: Please remember to use forward slashes for all paths (e.g. C:/dspace) -dspace.dir = /home/pasquale/Contenuti/4Science/InstallDSpace7Orig/ +dspace.dir = /dspace # URL of DSpace backend ('server' webapp). Include port number etc. # This is where REST API and all enabled server modules (OAI-PMH, SWORD, SWORDv2, RDF, etc) will respond @@ -50,7 +50,7 @@ solr.server = http://localhost:8983/solr # URL for connecting to database # * Postgres template: jdbc:postgresql://localhost:5432/dspace # * Oracle template: jdbc:oracle:thin:@//localhost:1521/xe -db.url = jdbc:postgresql://localhost:5432/dspace7 +db.url = jdbc:postgresql://localhost:5432/dspace # JDBC Driver # * For Postgres: org.postgresql.Driver @@ -63,7 +63,7 @@ db.driver = org.postgresql.Driver db.dialect = org.dspace.storage.rdbms.hibernate.postgres.DSpacePostgreSQL82Dialect # Database username and password -db.username = dspace7 +db.username = dspace db.password = dspace # Database Schema name From 774e3893f9806b0da4f6c9606cd0e21f6d016932 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Mon, 3 Aug 2020 14:22:48 +0200 Subject: [PATCH 18/34] Add fix from DS-4530 --- dspace-api/pom.xml | 8 ++++++++ .../org/dspace/app/rest/converter/ConverterService.java | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index 7a8662bcffcc..c6da39b26863 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -323,6 +323,14 @@ apache-jena-libs pom + + + + org.glassfish.jersey.inject + jersey-hk2 + ${jersey.version} + + commons-codec commons-codec diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java index 563f2045ca2a..84ce1a0032e3 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/ConverterService.java @@ -34,6 +34,7 @@ import org.dspace.app.rest.security.WebSecurityExpressionEvaluator; import org.dspace.app.rest.utils.Utils; import org.dspace.services.RequestService; +import org.springframework.aop.support.AopUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.config.BeanDefinition; import org.springframework.context.annotation.ClassPathScanningCandidateComponentProvider; @@ -51,6 +52,8 @@ /** * Converts domain objects from the DSpace service layer to rest objects, and from rest objects to resource * objects, applying {@link Projection}s where applicable. + * + * @author Luca Giamminonni (luca.giamminonni at 4science dot it) */ @Service public class ConverterService { @@ -150,7 +153,8 @@ private Annotation getAnnotationForRestObject(BaseObjectRest restObject) { .getResourceRepositoryByCategoryAndModel(baseObjectRest.getCategory(), baseObjectRest.getType()); Annotation preAuthorize = null; int maxDepth = 0; - for (Method m : repositoryToUse.getClass().getMethods()) { + // DS-4530 exclude the AOP Proxy from determining the annotations + for (Method m : AopUtils.getTargetClass(repositoryToUse).getMethods()) { if (StringUtils.equalsIgnoreCase(m.getName(), "findOne")) { int depth = howManySuperclass(m.getDeclaringClass()); if (depth > maxDepth) { From 9ee5762c3492ee4bd9edfb3f650b88d70619fb6c Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Mon, 3 Aug 2020 15:20:50 +0200 Subject: [PATCH 19/34] return 400 in external source if no record were found --- .../dspace/external/provider/impl/LiveImportDataProvider.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java index 7bc9766a19e1..613c8a4fa7e2 100644 --- a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java +++ b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java @@ -127,6 +127,10 @@ public int getNumberOfResults(String query) { * @return */ private ExternalDataObject getExternalDataObject(ImportRecord record) { + //return 400 if no record were found + if (record == null) { + throw new IllegalArgumentException("No record found for query or id"); + } ExternalDataObject externalDataObject = new ExternalDataObject(sourceIdentifier); String id = getFirstValue(record, recordIdMetadata); String display = getFirstValue(record, displayMetadata); From bdcf064660b471b37061b33eb90b5927362c5e14 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Mon, 3 Aug 2020 15:21:30 +0200 Subject: [PATCH 20/34] return 400 on arxiv in no record were found --- .../ArXivImportMetadataSourceServiceImpl.java | 66 ++++++++++++------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 869aaecd36f6..94b31fde965e 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -199,16 +199,20 @@ public Integer call() throws Exception { } Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - String responseString = response.readEntity(String.class); - OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(responseString)); - OMElement element = records.getDocumentElement(); - AXIOMXPath xpath = null; - try { - xpath = new AXIOMXPath("opensearch:totalResults"); - xpath.addNamespace("opensearch", "http://a9.com/-/spec/opensearch/1.1/"); - OMElement count = (OMElement) xpath.selectSingleNode(element); - return Integer.parseInt(count.getText()); - } catch (JaxenException e) { + if (response.getStatus() == 200) { + String responseString = response.readEntity(String.class); + OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(responseString)); + OMElement element = records.getDocumentElement(); + AXIOMXPath xpath = null; + try { + xpath = new AXIOMXPath("opensearch:totalResults"); + xpath.addNamespace("opensearch", "http://a9.com/-/spec/opensearch/1.1/"); + OMElement count = (OMElement) xpath.selectSingleNode(element); + return Integer.parseInt(count.getText()); + } catch (JaxenException e) { + return null; + } + } else { return null; } } @@ -253,12 +257,16 @@ public List call() throws Exception { } Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - String responseString = response.readEntity(String.class); - List omElements = splitToRecords(responseString); - for (OMElement record : omElements) { - results.add(transformSourceRecords(record)); + if (response.getStatus() == 200) { + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + return results; + } else { + return null; } - return results; } } @@ -295,12 +303,16 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("id_list", arxivid); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - String responseString = response.readEntity(String.class); - List omElements = splitToRecords(responseString); - for (OMElement record : omElements) { - results.add(transformSourceRecords(record)); + if (response.getStatus() == 200) { + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + return results; + } else { + return null; } - return results; } } @@ -327,12 +339,16 @@ public List call() throws Exception { WebTarget local = webTarget.queryParam("search_query", queryString); Invocation.Builder invocationBuilder = local.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); - String responseString = response.readEntity(String.class); - List omElements = splitToRecords(responseString); - for (OMElement record : omElements) { - results.add(transformSourceRecords(record)); + if (response.getStatus() == 200) { + String responseString = response.readEntity(String.class); + List omElements = splitToRecords(responseString); + for (OMElement record : omElements) { + results.add(transformSourceRecords(record)); + } + return results; + } else { + return null; } - return results; } private String getQuery(Query query) { From 4e48ff8de12d53cbc17c7513a75d651b851223d5 Mon Sep 17 00:00:00 2001 From: Marie Verdonck Date: Thu, 6 Aug 2020 10:12:28 +0200 Subject: [PATCH 21/34] 72387: Workflow minor fixes --- .../org/dspace/content/CollectionServiceImpl.java | 3 +-- .../org/dspace/content/WorkspaceItemServiceImpl.java | 7 ++++++- .../dspace/xmlworkflow/XmlWorkflowFactoryImpl.java | 4 ++-- .../xmlworkflow/factory/XmlWorkflowFactory.java | 2 +- .../main/java/org/dspace/xmlworkflow/state/Step.java | 2 +- .../dspace/xmlworkflow/XmlWorkflowFactoryTest.java | 4 ++-- .../WorkflowDefinitionCollectionsLinkRepository.java | 12 ++---------- .../app/rest/WorkflowDefinitionRestRepositoryIT.java | 2 +- 8 files changed, 16 insertions(+), 20 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/CollectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/CollectionServiceImpl.java index 34bf4f5fc15a..559b95edb835 100644 --- a/dspace-api/src/main/java/org/dspace/content/CollectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/CollectionServiceImpl.java @@ -57,7 +57,6 @@ import org.dspace.harvest.service.HarvestedCollectionService; import org.dspace.workflow.factory.WorkflowServiceFactory; import org.dspace.xmlworkflow.WorkflowConfigurationException; -import org.dspace.xmlworkflow.XmlWorkflowFactoryImpl; import org.dspace.xmlworkflow.factory.XmlWorkflowFactory; import org.dspace.xmlworkflow.state.Workflow; import org.dspace.xmlworkflow.storedcomponents.CollectionRole; @@ -387,7 +386,7 @@ public void setWorkflowGroup(Context context, Collection collection, int step, G log.error(LogManager.getHeader(context, "setWorkflowGroup", "collection_id=" + collection.getID() + " " + e.getMessage()), e); } - if (!StringUtils.equals(XmlWorkflowFactoryImpl.LEGACY_WORKFLOW_NAME, workflow.getID())) { + if (!StringUtils.equals(workflowFactory.getDefaultWorkflow().getID(), workflow.getID())) { throw new IllegalArgumentException( "setWorkflowGroup can be used only on collection with the default basic dspace workflow. " + "Instead, the collection: " diff --git a/dspace-api/src/main/java/org/dspace/content/WorkspaceItemServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/WorkspaceItemServiceImpl.java index c45f6c737c46..0d5ba537945c 100644 --- a/dspace-api/src/main/java/org/dspace/content/WorkspaceItemServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/WorkspaceItemServiceImpl.java @@ -265,7 +265,12 @@ public void deleteWrapper(Context context, WorkspaceItem workspaceItem) throws S // Need to delete the workspaceitem row first since it refers // to item ID - workspaceItem.getSupervisorGroups().clear(); + try { + workspaceItem.getSupervisorGroups().clear(); + } catch (Exception e) { + log.error("failed to clear supervisor group", e); + } + workspaceItemDAO.delete(context, workspaceItem); } diff --git a/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowFactoryImpl.java b/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowFactoryImpl.java index ffc62dcddb2c..4150d84d0495 100644 --- a/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowFactoryImpl.java @@ -97,7 +97,7 @@ public List getAllConfiguredWorkflows() { } @Override - public List getCollectionHandlesMappedToWorklow(Context context, String workflowName) { + public List getCollectionHandlesMappedToWorkflow(Context context, String workflowName) { List collectionsMapped = new ArrayList<>(); for (String handle : this.workflowMapping.keySet()) { if (this.workflowMapping.get(handle).getID().equals(workflowName)) { @@ -107,7 +107,7 @@ public List getCollectionHandlesMappedToWorklow(Context context, Str collectionsMapped.add(collection); } } catch (SQLException e) { - log.error("SQLException in XmlWorkflowFactoryImpl.getCollectionHandlesMappedToWorklow trying to " + + log.error("SQLException in XmlWorkflowFactoryImpl.getCollectionHandlesMappedToWorkflow trying to " + "retrieve collection with handle: " + handle, e); } } diff --git a/dspace-api/src/main/java/org/dspace/xmlworkflow/factory/XmlWorkflowFactory.java b/dspace-api/src/main/java/org/dspace/xmlworkflow/factory/XmlWorkflowFactory.java index 5d338437474a..db856bb57b10 100644 --- a/dspace-api/src/main/java/org/dspace/xmlworkflow/factory/XmlWorkflowFactory.java +++ b/dspace-api/src/main/java/org/dspace/xmlworkflow/factory/XmlWorkflowFactory.java @@ -86,7 +86,7 @@ public interface XmlWorkflowFactory { * @param workflowName Name of workflow we want the collections of that are mapped to is * @return List of collections mapped to the requested workflow */ - public List getCollectionHandlesMappedToWorklow(Context context, String workflowName); + public List getCollectionHandlesMappedToWorkflow(Context context, String workflowName); /** * Returns list of collections that are not mapped to any configured workflow, and thus use the default workflow diff --git a/dspace-api/src/main/java/org/dspace/xmlworkflow/state/Step.java b/dspace-api/src/main/java/org/dspace/xmlworkflow/state/Step.java index a982107d7849..16befc262637 100644 --- a/dspace-api/src/main/java/org/dspace/xmlworkflow/state/Step.java +++ b/dspace-api/src/main/java/org/dspace/xmlworkflow/state/Step.java @@ -81,7 +81,7 @@ public boolean hasUI() { /** * Get the next step based on out the outcome * @param outcome the outcome of the previous step - * @return the next stepp or NULL if there is no step configured for this outcome + * @return the next step or NULL if there is no step configured for this outcome */ public Step getNextStep(int outcome) { return outcomes.get(outcome); diff --git a/dspace-api/src/test/java/org/dspace/xmlworkflow/XmlWorkflowFactoryTest.java b/dspace-api/src/test/java/org/dspace/xmlworkflow/XmlWorkflowFactoryTest.java index c7239f1f5ae4..03a6a0e949b0 100644 --- a/dspace-api/src/test/java/org/dspace/xmlworkflow/XmlWorkflowFactoryTest.java +++ b/dspace-api/src/test/java/org/dspace/xmlworkflow/XmlWorkflowFactoryTest.java @@ -116,12 +116,12 @@ public void destroy() { @Test public void workflowMapping_NonMappedCollection() throws WorkflowConfigurationException { Workflow workflow = xmlWorkflowFactory.getWorkflow(this.nonMappedCollection); - assertEquals("defaultWorkflow", workflow.getID()); + assertEquals(XmlWorkflowFactoryImpl.LEGACY_WORKFLOW_NAME, workflow.getID()); } @Test public void workflowMapping_MappedCollection() throws WorkflowConfigurationException { Workflow workflow = xmlWorkflowFactory.getWorkflow(this.mappedCollection); - assertEquals("selectSingleReviewer", workflow.getID()); + assertEquals( "selectSingleReviewer", workflow.getID()); } } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/WorkflowDefinitionCollectionsLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/WorkflowDefinitionCollectionsLinkRepository.java index 7ae5f5ecc00a..fd1192e0bb51 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/WorkflowDefinitionCollectionsLinkRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/WorkflowDefinitionCollectionsLinkRepository.java @@ -12,13 +12,11 @@ import javax.annotation.Nullable; import javax.servlet.http.HttpServletRequest; -import org.dspace.app.rest.converter.ConverterService; import org.dspace.app.rest.model.CollectionRest; import org.dspace.app.rest.model.WorkflowDefinitionRest; import org.dspace.app.rest.projection.Projection; import org.dspace.app.rest.repository.AbstractDSpaceRestRepository; import org.dspace.app.rest.repository.LinkRestRepository; -import org.dspace.app.rest.utils.Utils; import org.dspace.content.Collection; import org.dspace.core.Context; import org.dspace.xmlworkflow.factory.XmlWorkflowFactory; @@ -43,12 +41,6 @@ public class WorkflowDefinitionCollectionsLinkRepository extends AbstractDSpaceR @Autowired protected XmlWorkflowFactory xmlWorkflowFactory; - @Autowired - protected ConverterService converter; - - @Autowired - protected Utils utils; - /** * GET endpoint that returns the list of collections that make an explicit use of the workflow-definition. * If a collection doesn't specify the workflow-definition to be used, the default mapping applies, @@ -69,10 +61,10 @@ public Page getCollections(@Nullable HttpServletRequest request, if (xmlWorkflowFactory.isDefaultWorkflow(workflowName)) { collectionsMappedToWorkflow.addAll(xmlWorkflowFactory.getAllNonMappedCollectionsHandles(context)); } - collectionsMappedToWorkflow.addAll(xmlWorkflowFactory.getCollectionHandlesMappedToWorklow(context, + collectionsMappedToWorkflow.addAll(xmlWorkflowFactory.getCollectionHandlesMappedToWorkflow(context, workflowName)); Pageable pageable = optionalPageable != null ? optionalPageable : PageRequest.of(0, 20); - return converter.toRestPage(collectionsMappedToWorkflow, pageable, + return super.converter.toRestPage(collectionsMappedToWorkflow, pageable, projection); } else { throw new ResourceNotFoundException("No workflow with name " + workflowName + " is configured"); diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/WorkflowDefinitionRestRepositoryIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/WorkflowDefinitionRestRepositoryIT.java index ab99e1d4c283..3f7ae7400065 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/WorkflowDefinitionRestRepositoryIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/WorkflowDefinitionRestRepositoryIT.java @@ -349,7 +349,7 @@ public void getCollectionsOfWorkflowByName_NonDefaultWorkflow() throws Exception if (StringUtils.isNotBlank(firstNonDefaultWorkflowName)) { List mappedCollections - = xmlWorkflowFactory.getCollectionHandlesMappedToWorklow(context, firstNonDefaultWorkflowName); + = xmlWorkflowFactory.getCollectionHandlesMappedToWorkflow(context, firstNonDefaultWorkflowName); //When we call this facets endpoint if (mappedCollections.size() > 0) { //returns array of collection jsons that are mapped to given workflow From c733576f5010a48e089231a7560fa0832feac6f0 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 12 Aug 2020 15:37:51 +0200 Subject: [PATCH 22/34] Fix conflict --- .../service/ArXivImportMetadataSourceServiceImpl.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 94b31fde965e..94e2426fe649 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -30,6 +30,7 @@ import org.dspace.importer.external.datamodel.Query; import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.service.AbstractImportMetadataSourceService; +import org.dspace.importer.external.service.components.QuerySource; import org.jaxen.JaxenException; /** @@ -38,7 +39,8 @@ * @author Pasquale Cavallo (pasquale.cavallo at 4Science dot it) * */ -public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService { +public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService + implements QuerySource { private WebTarget webTarget; private String baseAddress; @@ -77,7 +79,7 @@ public Collection getRecords(Query query) throws MetadataSourceExc * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override - public int getNbRecords(String query) throws MetadataSourceException { + public int getRecordsCount(String query) throws MetadataSourceException { return retry(new CountByQueryCallable(query)); } @@ -90,7 +92,7 @@ public int getNbRecords(String query) throws MetadataSourceException { * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override - public int getNbRecords(Query query) throws MetadataSourceException { + public int getRecordsCount(Query query) throws MetadataSourceException { return retry(new CountByQueryCallable(query)); } @@ -400,5 +402,4 @@ public String getBaseAddress() { public void setBaseAddress(String baseAddress) { this.baseAddress = baseAddress; } - } From 052e775bc532cd81d196b298b11fa855d7051183 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Wed, 12 Aug 2020 16:48:38 +0200 Subject: [PATCH 23/34] Fix conflict on live import --- .../provider/impl/LiveImportDataProvider.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java index 613c8a4fa7e2..45855a74ad48 100644 --- a/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java +++ b/dspace-api/src/main/java/org/dspace/external/provider/impl/LiveImportDataProvider.java @@ -19,7 +19,7 @@ import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.metadatamapping.MetadatumDTO; -import org.dspace.importer.external.service.components.MetadataSource; +import org.dspace.importer.external.service.components.QuerySource; /** * This class allows to configure a Live Import Provider as an External Data Provider @@ -29,9 +29,9 @@ */ public class LiveImportDataProvider implements ExternalDataProvider { /** - * The {@link MetadataSource} live import provider + * The {@link QuerySource} live import provider */ - private MetadataSource metadataSource; + private QuerySource querySource; /** * An unique human readable identifier for this provider @@ -59,8 +59,8 @@ public void setSourceIdentifier(String sourceIdentifier) { * This method set the MetadataSource for the ExternalDataProvider * @param metadataSource {@link org.dspace.importer.external.service.components.MetadataSource} implementation used to process the input data */ - public void setMetadataSource(MetadataSource metadataSource) { - this.metadataSource = metadataSource; + public void setMetadataSource(QuerySource querySource) { + this.querySource = querySource; } /** @@ -82,11 +82,11 @@ public void setDisplayMetadata(String displayMetadata) { @Override public Optional getExternalDataObject(String id) { try { - ExternalDataObject externalDataObject = getExternalDataObject(metadataSource.getRecord(id)); + ExternalDataObject externalDataObject = getExternalDataObject(querySource.getRecord(id)); return Optional.of(externalDataObject); } catch (MetadataSourceException e) { throw new RuntimeException( - "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + "The live import provider " + querySource.getImportSource() + " throws an exception", e); } } @@ -94,11 +94,11 @@ public Optional getExternalDataObject(String id) { public List searchExternalDataObjects(String query, int start, int limit) { Collection records; try { - records = metadataSource.getRecords(query, start, limit); + records = querySource.getRecords(query, start, limit); return records.stream().map(r -> getExternalDataObject(r)).collect(Collectors.toList()); } catch (MetadataSourceException e) { throw new RuntimeException( - "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + "The live import provider " + querySource.getImportSource() + " throws an exception", e); } } @@ -110,10 +110,10 @@ public boolean supports(String source) { @Override public int getNumberOfResults(String query) { try { - return metadataSource.getNbRecords(query); + return querySource.getRecordsCount(query); } catch (MetadataSourceException e) { throw new RuntimeException( - "The live import provider " + metadataSource.getImportSource() + " throws an exception", e); + "The live import provider " + querySource.getImportSource() + " throws an exception", e); } } From 2ef44b15876c7ca9c5e36e96941167a0a3f61db8 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 13 Aug 2020 16:35:21 +0200 Subject: [PATCH 24/34] lof exception in SimpleXPathMetadataContributor --- .../contributor/SimpleXpathMetadatumContributor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java index 91a5fc3fe29b..c8d2467d5f64 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/metadatamapping/contributor/SimpleXpathMetadatumContributor.java @@ -166,7 +166,7 @@ public Collection contributeMetadata(OMElement t) { } return values; } catch (JaxenException e) { - log.error(query); + log.error(query, e); throw new RuntimeException(e); } From fe6f51d246fbd83482c4c29573d4d6dae4ebf858 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 13 Aug 2020 17:01:25 +0200 Subject: [PATCH 25/34] Improve Java on ArXivImportMetadataSourceServiceImpl --- .../ArXivImportMetadataSourceServiceImpl.java | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java index 94e2426fe649..6b418423fac6 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/service/ArXivImportMetadataSourceServiceImpl.java @@ -46,7 +46,7 @@ public class ArXivImportMetadataSourceServiceImpl extends AbstractImportMetadata private String baseAddress; /** - * Find the number of records matching a string query. Supports pagination + * Find the number of records matching the query string in ArXiv. Supports pagination. * * @param query a query string to base the search on. * @param start offset to start at @@ -60,8 +60,11 @@ public Collection getRecords(String query, int start, int count) t } /** - * Find records based on a object query. - * + * Find records based on a object query and convert them to a list metadata mapped in ImportRecord. + * The entry with the key "query" of the Query's map will be used as query string value. + * + * @see org.dspace.importer.external.datamodel.Query + * @see org.dspace.importer.external.datamodel.ImportRecord * @param query a query object to base the search on. * @return a set of records. Fully transformed. * @throws MetadataSourceException if the underlying methods throw any exception. @@ -72,7 +75,7 @@ public Collection getRecords(Query query) throws MetadataSourceExc } /** - * Find the number of records matching a query; + * Find the number of records matching the query string in ArXiv; * * @param query a query object to base the search on. * @return the sum of the matching records over this import source @@ -86,7 +89,9 @@ public int getRecordsCount(String query) throws MetadataSourceException { /** * Find the number of records matching a query; - * + * The entry with the key "query" of the Query's map will be used to get the query string. + * + * @see org.dspace.importer.external.datamodel.Query * @param query a query string to base the search on. * @return the sum of the matching records over this import source * @throws MetadataSourceException if the underlying methods throw any exception. @@ -97,7 +102,7 @@ public int getRecordsCount(Query query) throws MetadataSourceException { } /** - * Get a single record from the source by id + * Get a single record of metadata from the arxiv by ArXiv ID. * * @param id id of the record in ArXiv * @return the first matching record @@ -111,8 +116,10 @@ public ImportRecord getRecord(String id) throws MetadataSourceException { } /** - * Get a single record from the source. - * + * Get a single record from the ArXiv matching the query. + * Field "query" will be used to get data from. + * + * @see org.dspace.importer.external.datamodel.Query * @param query a query matching a single record * @return the first matching record * @throws MetadataSourceException if the underlying methods throw any exception. @@ -169,6 +176,9 @@ public Collection findMatchingRecords(Query query) throws Metadata /** * This class is a Callable implementation to count the number of entries for an ArXiv * query. + * This Callable use as query value to ArXiv the string queryString passed to constructor. + * If the object will be construct through Query.class instance, the value of the Query's + * map with the key "query" will be used. * * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) * @@ -223,6 +233,9 @@ public Integer call() throws Exception { /** * This class is a Callable implementation to get ArXiv entries based on * query object. + * This Callable use as query value the string queryString passed to constructor. + * If the object will be construct through Query.class instance, a Query's map entry with key "query" will be used. + * Pagination is supported too, using the value of the Query's map with keys "start" and "count". * * @see org.dspace.importer.external.datamodel.Query * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) @@ -273,8 +286,9 @@ public List call() throws Exception { } /** - * This class is a Callable implementation to get ArXiv entry using ArXiv ID - * + * This class is a Callable implementation to get an ArXiv entry using ArXiv ID + * The ID to use can be passed through the constructor as a String or as Query's map entry, with the key "id". + * * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) * */ @@ -321,6 +335,8 @@ public List call() throws Exception { /** * This class is a Callable implementation to search ArXiv entries * using author and title. + * There are two field in the Query map to pass, with keys "title" and "author" + * (at least one must be used). * * @see org.dspace.importer.external.datamodel.Query * @author Pasquale Cavallo (pasquale.cavallo at 4science dot it) From e1fb87773c4aac18072fb8eef37cf866945d95e4 Mon Sep 17 00:00:00 2001 From: Samuel Date: Thu, 13 Aug 2020 20:11:32 +0200 Subject: [PATCH 26/34] taskid 72455 Cookie Preferences per account --- .../java/org/dspace/eperson/EPersonTest.java | 16 ++++++++++++++++ dspace/config/registries/dspace-types.xml | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java index 8950bfa40956..cd04fbb3404c 100644 --- a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java +++ b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java @@ -78,6 +78,22 @@ public void destroy() { super.destroy(); } + @Test + public void testPreferences() throws Exception { + + ePersonService.addMetadata(context, eperson, "dspace", "cookies", "functional", null, "true"); + ePersonService.addMetadata(context, eperson, "dspace", "cookies", "statistics", null, "false"); + context.commit(); + + assertEquals( + "true", + ePersonService.getMetadataFirstValue(eperson, "dspace", "cookies", "functional", null) + ); + assertEquals( + "false", + ePersonService.getMetadataFirstValue(eperson, "dspace", "cookies", "statistics", null) + ); + } /** * Test of equals method, of class EPerson. diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 56985373f08e..3c3f460bb8a8 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -16,5 +16,19 @@ + + dspace + cookies + functional + + + + + dspace + cookies + statistics + + + From 5eaee53c6b8f598e6c1392dc12660b7505646fcf Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 19 Aug 2020 15:16:59 +0200 Subject: [PATCH 27/34] taskid 72455 Cookie Preferences per account - feedback --- .../test/java/org/dspace/eperson/EPersonTest.java | 11 +++-------- dspace/config/registries/dspace-types.xml | 12 ++---------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java index cd04fbb3404c..36d7d031ffa0 100644 --- a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java +++ b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java @@ -81,17 +81,12 @@ public void destroy() { @Test public void testPreferences() throws Exception { - ePersonService.addMetadata(context, eperson, "dspace", "cookies", "functional", null, "true"); - ePersonService.addMetadata(context, eperson, "dspace", "cookies", "statistics", null, "false"); + ePersonService.addMetadata(context, eperson, "dspace", "agreements", "end-user", null, "test"); context.commit(); assertEquals( - "true", - ePersonService.getMetadataFirstValue(eperson, "dspace", "cookies", "functional", null) - ); - assertEquals( - "false", - ePersonService.getMetadataFirstValue(eperson, "dspace", "cookies", "statistics", null) + "test", + ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "end-user", null) ); } diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 3c3f460bb8a8..813998edd5b4 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -18,17 +18,9 @@ dspace - cookies - functional + agreements + end-user - - dspace - cookies - statistics - - - - From 899f04c5ed1651ce04c3a51e1442c9d9cd4bac81 Mon Sep 17 00:00:00 2001 From: jonas-atmire Date: Thu, 20 Aug 2020 13:06:02 +0200 Subject: [PATCH 28/34] Additional metadatafield related to the cookie preferences --- .../src/test/java/org/dspace/eperson/EPersonTest.java | 5 +++++ dspace/config/registries/dspace-types.xml | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java index 36d7d031ffa0..a47974a5e480 100644 --- a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java +++ b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java @@ -82,12 +82,17 @@ public void destroy() { public void testPreferences() throws Exception { ePersonService.addMetadata(context, eperson, "dspace", "agreements", "end-user", null, "test"); + ePersonService.addMetadata(context, eperson, "dspace", "agreements", "cookies", null, "Dspace cookies agreement metadata"); context.commit(); assertEquals( "test", ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "end-user", null) ); + assertEquals( + "Dspace cookies agreement metadata", + ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "cookies", null) + ); } /** diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 813998edd5b4..75c4ff84cd69 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -23,4 +23,11 @@ + + dspace + agreements + cookies + + + From 1773b40acb9fe65488abc59e8c101c66767650c8 Mon Sep 17 00:00:00 2001 From: jonas-atmire Date: Thu, 20 Aug 2020 14:53:31 +0200 Subject: [PATCH 29/34] Checkstyle fix --- dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java index a47974a5e480..99d673d4fa5c 100644 --- a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java +++ b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java @@ -82,7 +82,8 @@ public void destroy() { public void testPreferences() throws Exception { ePersonService.addMetadata(context, eperson, "dspace", "agreements", "end-user", null, "test"); - ePersonService.addMetadata(context, eperson, "dspace", "agreements", "cookies", null, "Dspace cookies agreement metadata"); + ePersonService.addMetadata(context, eperson, "dspace", "agreements", "cookies", null, + "Dspace cookies agreement metadata"); context.commit(); assertEquals( From fdc0cda30b065141ec6722d90bae4763bfe98624 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo <37987333+pasqualecvl@users.noreply.github.com> Date: Wed, 26 Aug 2020 14:27:56 +0200 Subject: [PATCH 30/34] Update pom.xml Add comment --- dspace-api/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index 68c822423cc0..41ad956d82e8 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -325,7 +325,7 @@ apache-jena-libs pom - + org.glassfish.jersey.inject From 7422dafef5d511281b808a333785e5598aec5bc4 Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 27 Aug 2020 16:22:29 +0200 Subject: [PATCH 31/34] Change javadoc on ArXivIdMetadataContributor.parseValue --- .../contributor/ArXivIdMetadataContributor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index 077fe85675d3..aaea53ecfcef 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -39,7 +39,8 @@ public Collection contributeMetadata(OMElement t) { } /** - * ArXiv doesn't return the item id. We have to get this from the path parameter + * ArXiv returns a full URL as in the value, e.g. http://arxiv.org/abs/1911.11405v1. + * This method parses out the identifier from the end of the URL, e.g. 1911.11405v1. * * @param dtos Metadata which contains the items uri * @return the items ids From 8adc7e5c318bd09dcf38133319da61d67b261328 Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 28 Aug 2020 15:26:22 +0200 Subject: [PATCH 32/34] taskid 72455 Cookie Preferences per account - add scope notes and more realistic test value --- .../java/org/dspace/eperson/EPersonTest.java | 23 +++++++++++++------ dspace/config/registries/dspace-types.xml | 4 ++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java index 99d673d4fa5c..7603500e8edb 100644 --- a/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java +++ b/dspace-api/src/test/java/org/dspace/eperson/EPersonTest.java @@ -81,18 +81,27 @@ public void destroy() { @Test public void testPreferences() throws Exception { - ePersonService.addMetadata(context, eperson, "dspace", "agreements", "end-user", null, "test"); - ePersonService.addMetadata(context, eperson, "dspace", "agreements", "cookies", null, - "Dspace cookies agreement metadata"); + String cookies = + "{" + + "\"token_item\":true," + + "\"impersonation\":true," + + "\"redirect\":true," + + "\"language\":true," + + "\"klaro\":true," + + "\"google-analytics\":false" + + "}"; + + ePersonService.addMetadata(context, eperson, "dspace", "agreements", "cookies", null, cookies); + ePersonService.addMetadata(context, eperson, "dspace", "agreements", "end-user", null, "true"); context.commit(); assertEquals( - "test", - ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "end-user", null) + cookies, + ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "cookies", null) ); assertEquals( - "Dspace cookies agreement metadata", - ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "cookies", null) + "true", + ePersonService.getMetadataFirstValue(eperson, "dspace", "agreements", "end-user", null) ); } diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 75c4ff84cd69..f88def2453b8 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -20,14 +20,14 @@ dspace agreements end-user - + Stores whether the End User Agreement has been accepted by an EPerson. Valid values; true, false dspace agreements cookies - + Stores the cookie preferences of an EPerson, as selected in last session. Value will be an array of cookieName/boolean pairs, specifying which cookies are allowed or not allowed. From daeb55fa0c52eb61fe73cecc44d5d8f20d334d97 Mon Sep 17 00:00:00 2001 From: "Mark H. Wood" Date: Wed, 2 Sep 2020 16:19:37 -0400 Subject: [PATCH 33/34] Use getters instead of direct reference that may throw NPE. #2951 --- .../java/org/dspace/identifier/DOIIdentifierProvider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java b/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java index 46bc317d13ea..9db44020079b 100644 --- a/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java +++ b/dspace-api/src/main/java/org/dspace/identifier/DOIIdentifierProvider.java @@ -761,9 +761,9 @@ public String getDOIOutOfObject(DSpaceObject dso) Item item = (Item) dso; List metadata = itemService.getMetadata(item, MD_SCHEMA, DOI_ELEMENT, DOI_QUALIFIER, null); + String leftPart = DOI.RESOLVER + SLASH + getPrefix() + SLASH + getNamespaceSeparator(); for (MetadataValue id : metadata) { - if (id.getValue().startsWith( - DOI.RESOLVER + String.valueOf(SLASH) + PREFIX + String.valueOf(SLASH) + NAMESPACE_SEPARATOR)) { + if (id.getValue().startsWith(leftPart)) { return doiService.DOIFromExternalFormat(id.getValue()); } } From 1b4988d9d6df0b0c7dc87ed56e142be9d7bb736e Mon Sep 17 00:00:00 2001 From: Pasquale Cavallo Date: Thu, 3 Sep 2020 17:25:02 +0200 Subject: [PATCH 34/34] remove wrong comment --- .../metadatamapping/contributor/ArXivIdMetadataContributor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java index aaea53ecfcef..ed5ac5960b8b 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/arxiv/metadatamapping/contributor/ArXivIdMetadataContributor.java @@ -43,7 +43,6 @@ public Collection contributeMetadata(OMElement t) { * This method parses out the identifier from the end of the URL, e.g. 1911.11405v1. * * @param dtos Metadata which contains the items uri - * @return the items ids */ private void parseValue(Collection dtos) { if (dtos != null) {