Skip to content

Commit

Permalink
Merge remote-tracking branch 'blessed/master' into DD-1338-continued
Browse files Browse the repository at this point in the history
  • Loading branch information
jo-pol committed Jun 29, 2023
2 parents 50f7fee + 9e7be7f commit 356d77f
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 197 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import nl.knaw.dans.ingest.core.domain.Deposit;
import nl.knaw.dans.ingest.core.domain.DepositFile;
import nl.knaw.dans.ingest.core.domain.OriginalFilePathMapping;
import nl.knaw.dans.ingest.core.service.ManifestHelper;
import nl.knaw.dans.ingest.core.service.ManifestHelperImpl;
import nl.knaw.dans.ingest.core.service.XPathEvaluator;
import org.w3c.dom.Node;

Expand All @@ -36,7 +36,7 @@ public class DepositFileListerImpl implements DepositFileLister {
public List<DepositFile> getDepositFiles(Deposit deposit) throws IOException {
var bag = deposit.getBag();
var bagDir = bag.getRootDir();
var filePathToSha1 = ManifestHelper.getFilePathToSha1(bag);
var filePathToSha1 = ManifestHelperImpl.getFilePathToSha1(bag);
var originalFilePathMappings = getOriginalFilePathMapping(bagDir);

return XPathEvaluator.nodes(deposit.getFilesXml(), "/files:files/files:file")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import nl.knaw.dans.ingest.core.exception.InvalidDepositException;
import nl.knaw.dans.ingest.core.io.BagDataManager;
import nl.knaw.dans.ingest.core.io.FileService;
import nl.knaw.dans.ingest.core.service.ManifestHelper;
import nl.knaw.dans.ingest.core.service.ManifestHelperImpl;
import nl.knaw.dans.ingest.core.service.XmlReader;
import org.apache.commons.configuration2.Configuration;
import org.apache.commons.lang3.StringUtils;
Expand All @@ -41,12 +43,15 @@ public class DepositReaderImpl implements DepositReader {
private final BagDataManager bagDataManager;
private final DepositFileLister depositFileLister;

public DepositReaderImpl(XmlReader xmlReader, BagDirResolver bagDirResolver, FileService fileService, BagDataManager bagDataManager, DepositFileLister depositFileLister) {
private final ManifestHelper manifestHelper;

public DepositReaderImpl(XmlReader xmlReader, BagDirResolver bagDirResolver, FileService fileService, BagDataManager bagDataManager, DepositFileLister depositFileLister, ManifestHelper manifestHelper) {
this.xmlReader = xmlReader;
this.bagDirResolver = bagDirResolver;
this.fileService = fileService;
this.bagDataManager = bagDataManager;
this.depositFileLister = depositFileLister;
this.manifestHelper = manifestHelper;
}

@Override
Expand All @@ -60,11 +65,12 @@ public Deposit readDeposit(Path depositDir) throws InvalidDepositException {
var bagDir = bagDirResolver.getBagDir(depositDir);

var config = bagDataManager.readDepositProperties(depositDir);
var bagInfo = bagDataManager.readBag(bagDir);
var bag = bagDataManager.readBag(bagDir);
manifestHelper.ensureSha1ManifestPresent(bag);

var deposit = mapToDeposit(depositDir, bagDir, config, bagInfo);
var deposit = mapToDeposit(depositDir, bagDir, config, bag);

deposit.setBag(bagInfo);
deposit.setBag(bag);
deposit.setDdm(readOptionalXmlFile(deposit.getDdmPath()));
deposit.setFilesXml(readOptionalXmlFile(deposit.getFilesXmlPath()));
deposit.setAmd(readOptionalXmlFile(deposit.getAmdPath()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ public void run() {
}
catch (InvalidDepositException e) {
try {
updateDepositFromResult(DepositState.FAILED, e.getMessage());
moveDepositToOutbox(depositLocation.getDir(), OutboxSubDir.FAILED);
}
catch (IOException ex) {
Expand Down Expand Up @@ -307,16 +308,7 @@ void validateDeposit() {
var result = dansBagValidator.validateBag(
deposit.getBagDir(), ValidateCommand.PackageTypeEnum.DEPOSIT, 1);

if (result.getIsCompliant()) {
try {
ManifestHelper.ensureSha1ManifestPresent(deposit.getBag());
}
catch (Exception e) {
log.error("could not add SHA1 manifest", e);
throw new FailedDepositException(deposit, e.getMessage());
}
}
else {
if (!result.getIsCompliant()) {
var violations = result.getRuleViolations().stream()
.map(r -> String.format("- [%s] %s", r.getRule(), r.getViolation()))
.collect(Collectors.joining("\n"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public DepositIngestTaskFactoryBuilder(DdIngestFlowConfiguration configuration,
final var bagReader = new BagReader();
final var bagDataManager = new BagDataManagerImpl(bagReader);
final var bagDirResolver = new BagDirResolverImpl(fileService);
final var depositReader = new DepositReaderImpl(xmlReader, bagDirResolver, fileService, bagDataManager, depositFileLister);
final var depositReader = new DepositReaderImpl(xmlReader, bagDirResolver, fileService, bagDataManager, depositFileLister, new ManifestHelperImpl());
final var depositLocationReader = new DepositLocationReaderImpl(bagDirResolver, bagDataManager);
final var depositWriter = new DepositWriterImpl(bagDataManager);

Expand Down
88 changes: 2 additions & 86 deletions src/main/java/nl/knaw/dans/ingest/core/service/ManifestHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,96 +15,12 @@
*/
package nl.knaw.dans.ingest.core.service;

import gov.loc.repository.bagit.creator.CreatePayloadManifestsVistor;
import gov.loc.repository.bagit.creator.CreateTagManifestsVistor;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.hash.Hasher;
import gov.loc.repository.bagit.hash.StandardSupportedAlgorithms;
import gov.loc.repository.bagit.util.PathUtils;
import gov.loc.repository.bagit.writer.ManifestWriter;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static gov.loc.repository.bagit.hash.StandardSupportedAlgorithms.SHA1;
public interface ManifestHelper {
void ensureSha1ManifestPresent(Bag bag) throws NoSuchAlgorithmException, IOException;

public class ManifestHelper {

static public void ensureSha1ManifestPresent(Bag bag) throws NoSuchAlgorithmException, IOException {
var manifests = bag.getPayLoadManifests();
var algorithms = manifests.stream().map(Manifest::getAlgorithm);

if (algorithms.anyMatch(SHA1::equals)) {
return;
}

var payloadFilesMap = Hasher.createManifestToMessageDigestMap(List.of(SHA1));
var payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, true);
Files.walkFileTree(PathUtils.getDataDir(bag), payloadVisitor);
manifests.addAll(payloadFilesMap.keySet());
ManifestWriter.writePayloadManifests(manifests, PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());

updateTagManifests(bag);
}

private static void updateTagManifests(Bag bag) throws NoSuchAlgorithmException, IOException {
var algorithms = bag.getTagManifests().stream()
.map(Manifest::getAlgorithm)
.collect(Collectors.toList());
var tagFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
var bagRootDir = bag.getRootDir();
var tagVisitor = new CreateTagManifestsVistor(tagFilesMap, true) {

@Override
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IOException {
/*
* Fix for EASY-1306: a tag manifest must not contain an entry for itself, as this is practically
* impossible to calculate. It could in theory contain entries for other tag manifests. However,
* the CreateTagManifestsVistor, once it finds an entry for a tag file in ONE of the tag manifests,
* will add an entry in ALL tag manifests.
*
* Therefore, we adopt the strategy NOT to calculate any checksums for the tag manifests themselves.
*
* Update: this is actually required in V1.0: https://tools.ietf.org/html/rfc8493#section-2.2.1
*/
var isTagManifest = bagRootDir.relativize(path).getNameCount() == 1 &&
path.getFileName().toString().startsWith("tagmanifest-");

if (isTagManifest) {
return FileVisitResult.CONTINUE;
}
else {
return super.visitFile(path, attrs);
}
}
};

Files.walkFileTree(bagRootDir, tagVisitor);
bag.getTagManifests().clear();
bag.getTagManifests().addAll(tagFilesMap.keySet());
ManifestWriter.writeTagManifests(bag.getTagManifests(), PathUtils.getBagitDir(bag), bagRootDir, bag.getFileEncoding());
}

static public Map<Path, String> getFilePathToSha1(Bag bag) {
var result = new HashMap<Path, String>();
var manifest = bag.getPayLoadManifests().stream()
.filter(item -> item.getAlgorithm().equals(StandardSupportedAlgorithms.SHA1))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Deposit bag does not have SHA-1 payload manifest"));

for (var entry : manifest.getFileToChecksumMap().entrySet()) {
result.put(bag.getRootDir().relativize(entry.getKey()), entry.getValue());
}

return result;
}
}
111 changes: 111 additions & 0 deletions src/main/java/nl/knaw/dans/ingest/core/service/ManifestHelperImpl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Copyright (C) 2022 DANS - Data Archiving and Networked Services (info@dans.knaw.nl)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.ingest.core.service;

import gov.loc.repository.bagit.creator.CreatePayloadManifestsVistor;
import gov.loc.repository.bagit.creator.CreateTagManifestsVistor;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.hash.Hasher;
import gov.loc.repository.bagit.hash.StandardSupportedAlgorithms;
import gov.loc.repository.bagit.util.PathUtils;
import gov.loc.repository.bagit.writer.ManifestWriter;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static gov.loc.repository.bagit.hash.StandardSupportedAlgorithms.SHA1;

public class ManifestHelperImpl implements ManifestHelper {

@Override
public void ensureSha1ManifestPresent(Bag bag) throws NoSuchAlgorithmException, IOException {
var manifests = bag.getPayLoadManifests();
var algorithms = manifests.stream().map(Manifest::getAlgorithm);

if (algorithms.anyMatch(SHA1::equals)) {
return;
}

var payloadFilesMap = Hasher.createManifestToMessageDigestMap(List.of(SHA1));
var payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, true);
Files.walkFileTree(PathUtils.getDataDir(bag), payloadVisitor);
manifests.addAll(payloadFilesMap.keySet());
ManifestWriter.writePayloadManifests(manifests, PathUtils.getBagitDir(bag), bag.getRootDir(), bag.getFileEncoding());

updateTagManifests(bag);
}

private static void updateTagManifests(Bag bag) throws NoSuchAlgorithmException, IOException {
var algorithms = bag.getTagManifests().stream()
.map(Manifest::getAlgorithm)
.collect(Collectors.toList());
var tagFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
var bagRootDir = bag.getRootDir();
var tagVisitor = new CreateTagManifestsVistor(tagFilesMap, true) {

@Override
public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IOException {
/*
* Fix for EASY-1306: a tag manifest must not contain an entry for itself, as this is practically
* impossible to calculate. It could in theory contain entries for other tag manifests. However,
* the CreateTagManifestsVistor, once it finds an entry for a tag file in ONE of the tag manifests,
* will add an entry in ALL tag manifests.
*
* Therefore, we adopt the strategy NOT to calculate any checksums for the tag manifests themselves.
*
* Update: this is actually required in V1.0: https://tools.ietf.org/html/rfc8493#section-2.2.1
*/
var isTagManifest = bagRootDir.relativize(path).getNameCount() == 1 &&
path.getFileName().toString().startsWith("tagmanifest-");

if (isTagManifest) {
return FileVisitResult.CONTINUE;
}
else {
return super.visitFile(path, attrs);
}
}
};

Files.walkFileTree(bagRootDir, tagVisitor);
bag.getTagManifests().clear();
bag.getTagManifests().addAll(tagFilesMap.keySet());
ManifestWriter.writeTagManifests(bag.getTagManifests(), PathUtils.getBagitDir(bag), bagRootDir, bag.getFileEncoding());
}

static public Map<Path, String> getFilePathToSha1(Bag bag) {
var result = new HashMap<Path, String>();
var manifest = bag.getPayLoadManifests().stream()
.filter(item -> item.getAlgorithm().equals(StandardSupportedAlgorithms.SHA1))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Deposit bag does not have SHA-1 payload manifest"));

for (var entry : manifest.getFileToChecksumMap().entrySet()) {
result.put(bag.getRootDir().relativize(entry.getKey()), entry.getValue());
}

return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import nl.knaw.dans.ingest.core.service.DepositIngestTask;
import nl.knaw.dans.ingest.core.service.DepositMigrationTask;
import nl.knaw.dans.ingest.core.service.EventWriter;
import nl.knaw.dans.ingest.core.service.ManifestHelper;
import nl.knaw.dans.ingest.core.service.XmlReader;
import nl.knaw.dans.ingest.core.service.XmlReaderImpl;
import nl.knaw.dans.ingest.core.service.ZipFileHandler;
Expand Down Expand Up @@ -87,7 +88,8 @@ private DepositIngestTask createTaskWrapper(String depositName, String created)
var bagDataManager = Mockito.mock(BagDataManager.class);
var bagDirResolver = new BagDirResolverImpl(fileService);
var depositLocationReader = new DepositLocationReaderImpl(bagDirResolver, bagDataManager);
var depositReader = new DepositReaderImpl(xmlReader, bagDirResolver, fileService, bagDataManager, depositFileLister);
var manifestHelper = Mockito.mock(ManifestHelper.class);
var depositReader = new DepositReaderImpl(xmlReader, bagDirResolver, fileService, bagDataManager, depositFileLister, manifestHelper);
var depositWriter = new DepositWriterImpl(bagDataManager);
var depositManager = new DepositManagerImpl(depositReader, depositLocationReader, depositWriter);
// TODO dont actually read the data from disk, just keep it in this class
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import nl.knaw.dans.ingest.core.exception.InvalidDepositException;
import nl.knaw.dans.ingest.core.io.BagDataManagerImpl;
import nl.knaw.dans.ingest.core.io.FileServiceImpl;
import nl.knaw.dans.ingest.core.service.ManifestHelperImpl;
import nl.knaw.dans.ingest.core.service.XmlReaderImpl;
import org.junit.jupiter.api.Test;

Expand All @@ -34,11 +35,12 @@ void getDepositFiles_should_list_files_with_original_filepaths() throws Exceptio
var lister = new DepositFileListerImpl();
var fileService = new FileServiceImpl();
var depositReader = new DepositReaderImpl(
new XmlReaderImpl(),
new BagDirResolverImpl(fileService),
fileService,
new BagDataManagerImpl(new BagReader()),
lister
new XmlReaderImpl(),
new BagDirResolverImpl(fileService),
fileService,
new BagDataManagerImpl(new BagReader()),
lister,
new ManifestHelperImpl()
);

var path = Path.of(getClass().getResource("/examples/valid-with-original-filepaths").toURI().getPath());
Expand All @@ -47,19 +49,19 @@ void getDepositFiles_should_list_files_with_original_filepaths() throws Exceptio
var files = deposit.getFiles();

assertThat(files).extracting("path")
.containsOnly(
Path.of("data/random images/image01.png"),
Path.of("data/random images/image02.jpeg"),
Path.of("data/random images/image03.jpeg"),
Path.of("data/a/deeper/path/With some file.txt")
);
.containsOnly(
Path.of("data/random images/image01.png"),
Path.of("data/random images/image02.jpeg"),
Path.of("data/random images/image03.jpeg"),
Path.of("data/a/deeper/path/With some file.txt")
);

assertThat(files).extracting("physicalPath")
.containsOnly(
Path.of("data/aa2345ab-bff5-49c9-b224-f8d3df0fd37a"),
Path.of("data/57f6f2f8-8d87-43ec-ac0e-68bdac21223e"),
Path.of("data/79c713b0-b232-4aaa-80cc-9bc34111acf7"),
Path.of("data/26e30e9b-64a8-4a2f-8c70-a4653219c984")
);
.containsOnly(
Path.of("data/aa2345ab-bff5-49c9-b224-f8d3df0fd37a"),
Path.of("data/57f6f2f8-8d87-43ec-ac0e-68bdac21223e"),
Path.of("data/79c713b0-b232-4aaa-80cc-9bc34111acf7"),
Path.of("data/26e30e9b-64a8-4a2f-8c70-a4653219c984")
);
}
}
Loading

0 comments on commit 356d77f

Please sign in to comment.