Skip to content

Commit

Permalink
Merge pull request #1939 from atmire/DS-3817
Browse files Browse the repository at this point in the history
DS-3817: Support the DSpace Cover page functionality in the bitstream endpoint
  • Loading branch information
abollini committed Feb 16, 2018
2 parents dcc0840 + 79208c7 commit 724e4cb
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 27 deletions.
14 changes: 7 additions & 7 deletions dspace-api/src/main/java/org/dspace/curate/CitationPage.java
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.curate;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.*;
Expand Down Expand Up @@ -150,9 +151,9 @@ protected void performItem(Item item) throws SQLException {
+ bitstream.getName() + " is citable.");
try {
//Create the cited document
File citedDocument = citationDocument.makeCitedDocument(Curator.curationContext(), bitstream);
Pair<InputStream, Long> citedDocument = citationDocument.makeCitedDocument(Curator.curationContext(), bitstream);
//Add the cited document to the approiate bundle
this.addCitedPageToItem(citedDocument, bundle, pBundle,
this.addCitedPageToItem(citedDocument.getLeft(), bundle, pBundle,
dBundle, displayMap, item, bitstream);
} catch (Exception e) {
//Could be many things, but nothing that should be
Expand Down Expand Up @@ -187,7 +188,7 @@ protected void performItem(Item item) throws SQLException {
* A helper function for {@link CitationPage#performItem(Item)}. This function takes in the
* cited document as a File and adds it to DSpace properly.
*
* @param citedTemp The temporary File that is the cited document.
* @param citedDoc The inputstream that is the cited document.
* @param bundle The bundle the cited file is from.
* @param pBundle The preservation bundle. The original document should be
* put in here if it is not already.
Expand All @@ -200,7 +201,7 @@ protected void performItem(Item item) throws SQLException {
* @throws AuthorizeException if authorization error
* @throws IOException if IO error
*/
protected void addCitedPageToItem(File citedTemp, Bundle bundle, Bundle pBundle,
protected void addCitedPageToItem(InputStream citedDoc, Bundle bundle, Bundle pBundle,
Bundle dBundle, Map<String,Bitstream> displayMap, Item item,
Bitstream bitstream) throws SQLException, AuthorizeException, IOException {
//If we are modifying a file that is not in the
Expand All @@ -218,12 +219,11 @@ protected void addCitedPageToItem(File citedTemp, Bundle bundle, Bundle pBundle,
//Create an input stream form the temporary file
//that is the cited document and create a
//bitstream from it.
InputStream inp = new FileInputStream(citedTemp);
if (displayMap.containsKey(bitstream.getName())) {
bundleService.removeBitstream(context, dBundle, displayMap.get(bitstream.getName()));
}
Bitstream citedBitstream = bitstreamService.create(context, dBundle, inp);
inp.close(); //Close up the temporary InputStream
Bitstream citedBitstream = bitstreamService.create(context, dBundle, citedDoc);
citedDoc.close(); //Close up the temporary InputStream

//Setup a good name for our bitstream and make
//it the same format as the source document.
Expand Down
Expand Up @@ -8,6 +8,7 @@
package org.dspace.disseminate;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
Expand Down Expand Up @@ -118,6 +119,8 @@ public void afterPropertiesSet() throws Exception {
//Populate VALID_TYPES
VALID_TYPES.addAll(PDF_MIMES);

// Global enabled?
citationEnabledGlobally = configurationService.getBooleanProperty("citation-page.enable_globally", false);

//Load enabled collections
String[] citationEnabledCollections = configurationService.getArrayProperty("citation-page.enabled_collections");
Expand Down Expand Up @@ -206,10 +209,6 @@ protected CitationDocumentServiceImpl() {}
protected Boolean citationEnabledGlobally = null;

protected boolean isCitationEnabledGlobally() {
if(citationEnabledGlobally == null) {
citationEnabledGlobally = configurationService.getBooleanProperty("citation-page.enable_globally", false);
}

return citationEnabledGlobally;
}

Expand Down Expand Up @@ -273,7 +272,7 @@ public boolean canGenerateCitationVersion(Context context, Bitstream bitstream)
}

@Override
public File makeCitedDocument(Context context, Bitstream bitstream)
public Pair<InputStream, Long> makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException {
PDDocument document = new PDDocument();
PDDocument sourceDocument = new PDDocument();
Expand All @@ -284,8 +283,14 @@ public File makeCitedDocument(Context context, Bitstream bitstream)
generateCoverPage(context, document, coverPage, item);
addCoverPageToDocument(document, sourceDocument, coverPage);

document.save(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
return new File(tempDir.getAbsolutePath() + "/bitstream.cover.pdf");
//We already have the full PDF in memory, so keep it there
try(ByteArrayOutputStream out = new ByteArrayOutputStream()) {
document.save(out);

byte[] data = out.toByteArray();
return Pair.of((InputStream) new ByteArrayInputStream(data), new Long(data.length));
}

} finally {
sourceDocument.close();
document.close();
Expand Down
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.disseminate.service;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
Expand All @@ -15,8 +16,8 @@
import org.dspace.content.Item;
import org.dspace.core.Context;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;

/**
Expand Down Expand Up @@ -81,7 +82,7 @@ public interface CitationDocumentService {
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public File makeCitedDocument(Context context, Bitstream bitstream)
public Pair<InputStream, Long> makeCitedDocument(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException;

/**
Expand Down
Expand Up @@ -17,6 +17,7 @@
import javax.ws.rs.core.Response;

import org.apache.catalina.connector.ClientAbortException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Logger;
import org.dspace.app.rest.model.BitstreamRest;
import org.dspace.app.rest.utils.ContextUtil;
Expand All @@ -28,6 +29,7 @@
import org.dspace.content.service.BitstreamService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.disseminate.service.CitationDocumentService;
import org.dspace.services.EventService;
import org.dspace.usage.UsageEvent;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -71,6 +73,9 @@ public class BitstreamContentRestController {
@Autowired
private AuthorizeService authorizeService;

@Autowired
private CitationDocumentService citationDocumentService;

@RequestMapping(method = {RequestMethod.GET, RequestMethod.HEAD})
public void retrieve(@PathVariable UUID uuid, HttpServletResponse response,
HttpServletRequest request) throws IOException, SQLException, AuthorizeException {
Expand All @@ -86,23 +91,18 @@ public void retrieve(@PathVariable UUID uuid, HttpServletResponse response,
Long lastModified = bitstreamService.getLastModified(bit);
BitstreamFormat format = bit.getFormat(context);
String mimetype = format.getMIMEType();
String name = getBitstreamName(bit, format);

Pair<InputStream, Long> bitstreamTuple = getBitstreamInputStreamAndSize(context, bit);

// Pipe the bits
try(InputStream is = bitstreamService.retrieve(context, bit)) {

String name = bit.getName();
if (name == null) {
// give a default name to the file based on the UUID and the primary extension of the format
name = bit.getID().toString();
if (format != null && format.getExtensions() != null && format.getExtensions().size() > 0) {
name += "." + format.getExtensions().get(0);
}
}
try (InputStream is = bitstreamTuple.getLeft())
{
MultipartFileSender sender = MultipartFileSender
.fromInputStream(is)
.withBufferSize(BUFFER_SIZE)
.withFileName(name)
.withLength(bit.getSize())
.withLength(bitstreamTuple.getRight())
.withChecksum(bit.getChecksum())
.withMimetype(mimetype)
.withLastModified(lastModified)
Expand Down Expand Up @@ -134,6 +134,40 @@ public void retrieve(@PathVariable UUID uuid, HttpServletResponse response,
}
}

private Pair<InputStream, Long> getBitstreamInputStreamAndSize(Context context, Bitstream bit) throws SQLException, IOException, AuthorizeException {

if (citationDocumentService.isCitationEnabledForBitstream(bit, context)) {
return generateBitstreamWithCitation(context, bit);
} else {
return Pair.of(bitstreamService.retrieve(context, bit),bit.getSize());
}
}

private Pair<InputStream, Long> generateBitstreamWithCitation(Context context, Bitstream bitstream) throws SQLException, IOException, AuthorizeException {
//Create the cited document
Pair<InputStream, Long> citationDocument = citationDocumentService.makeCitedDocument(context, bitstream);
if (citationDocument.getLeft() == null) {
log.error("CitedDocument was null");
} else {
if (log.isDebugEnabled()) {
log.debug("CitedDocument was ok, has size " + citationDocument.getRight());
}
}
return citationDocument;
}

private String getBitstreamName(Bitstream bit, BitstreamFormat format) {
String name = bit.getName();
if (name == null) {
// give a default name to the file based on the UUID and the primary extension of the format
name = bit.getID().toString();
if (format != null && format.getExtensions() != null && format.getExtensions().size() > 0) {
name += "." + format.getExtensions().get(0);
}
}
return name;
}

private Bitstream getBitstream(Context context, @PathVariable UUID uuid, HttpServletResponse response) throws SQLException, IOException, AuthorizeException {
Bitstream bit = bitstreamService.find(context, uuid);
if (bit == null) {
Expand Down
Expand Up @@ -7,17 +7,28 @@
*/
package org.dspace.app.rest;

import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.head;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.*;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.UUID;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.CharEncoding;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
Expand All @@ -30,11 +41,14 @@
import org.dspace.content.Bitstream;
import org.dspace.content.Collection;
import org.dspace.content.Item;
import org.dspace.disseminate.CitationDocumentServiceImpl;
import org.dspace.eperson.Group;
import org.dspace.services.ConfigurationService;
import org.dspace.solr.MockSolrServer;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;

/**
* Integration test to test the /api/core/bitstreams/[id]/content endpoint
Expand All @@ -46,12 +60,19 @@ public class BitstreamContentRestControllerIT extends AbstractControllerIntegrat

private MockSolrServer mockSolrServer;

@Autowired
private ConfigurationService configurationService;

@Autowired
private CitationDocumentServiceImpl citationDocumentService;

@Before
public void setup() throws Exception {
super.setUp();
mockSolrServer = new MockSolrServer("statistics");
mockSolrServer.getSolrServer().deleteByQuery("*:*");
mockSolrServer.getSolrServer().commit();
configurationService.setProperty("citation-page.enable_globally", false);
}

@After
Expand Down Expand Up @@ -301,4 +322,94 @@ private void checkNumberOfStatsRecords(Bitstream bitstream, int expectedNumberOf
assertEquals(expectedNumberOfStatsRecords, queryResponse.getResults().getNumFound());
}

@Test
public void retrieveCitationCoverpageOfBitstream() throws Exception {
configurationService.setProperty("citation-page.enable_globally", true);
citationDocumentService.afterPropertiesSet();
context.turnOffAuthorisationSystem();

//** GIVEN **
//1. A community-collection structure with one parent community and one collections.
parentCommunity = CommunityBuilder.createCommunity(context)
.withName("Parent Community")
.build();

Collection col1 = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection 1").build();

//2. A public item with a bitstream
File originalPdf = new File(testProps.getProperty("test.bitstream"));


try(InputStream is = new FileInputStream(originalPdf)) {

Item publicItem1 = ItemBuilder.createItem(context, col1)
.withTitle("Public item citation cover page test 1")
.withIssueDate("2017-10-17")
.withAuthor("Smith, Donald").withAuthor("Doe, John")
.build();

Bitstream bitstream = BitstreamBuilder
.createBitstream(context, publicItem1, is)
.withName("Test bitstream")
.withDescription("This is a bitstream to test the citation cover page.")
.withMimeType("application/pdf")
.build();

//** WHEN **
//We download the bitstream
byte[] content = getClient().perform(get("/api/core/bitstreams/" + bitstream.getID() + "/content"))

//** THEN **
.andExpect(status().isOk())

//The Content Length must match the full length
.andExpect(header().string("Content-Length", not(nullValue())))
//The server should indicate we support Range requests
.andExpect(header().string("Accept-Ranges", "bytes"))
//The ETag has to be based on the checksum
.andExpect(header().string("ETag", bitstream.getChecksum()))
//We expect the content type to match the bitstream mime type
.andExpect(content().contentType("application/pdf"))
//THe bytes of the content must match the original content
.andReturn().getResponse().getContentAsByteArray();

// The citation cover page contains the item title. We will now verify that the pdf text contains this title.
String pdfText = extractPDFText(content);
System.out.println(pdfText);
assertTrue(StringUtils.contains(pdfText,"Public item citation cover page test 1"));

// The dspace-api/src/test/data/dspaceFolder/assetstore/ConstitutionofIreland.pdf file contains 64 pages, manually counted + 1 citation cover page
assertEquals(65,getNumberOfPdfPages(content));

//A If-None-Match HEAD request on the ETag must tell is the bitstream is not modified
getClient().perform(head("/api/core/bitstreams/" + bitstream.getID() + "/content")
.header("If-None-Match", bitstream.getChecksum()))
.andExpect(status().isNotModified());

//The download and head request should also be logged as a statistics record
checkNumberOfStatsRecords(bitstream, 2);
}
}

private String extractPDFText(byte[] content) throws IOException {
PDFTextStripper pts = new PDFTextStripper();
pts.setSortByPosition(true);

try (ByteArrayInputStream source = new ByteArrayInputStream(content);
Writer writer = new StringWriter();
PDDocument pdfDoc = PDDocument.load(source)){

pts.writeText(pdfDoc, writer);
return writer.toString();
}
}

private int getNumberOfPdfPages(byte[] content) throws IOException {
try (ByteArrayInputStream source = new ByteArrayInputStream(content);
PDDocument pdfDoc = PDDocument.load(source)){
return pdfDoc.getNumberOfPages();
}
}


}

0 comments on commit 724e4cb

Please sign in to comment.