Skip to content

Commit

Permalink
changed .bagit to be version 2.0 and implemented 1.0 spec that all ma…
Browse files Browse the repository at this point in the history
…nifest must list all files
  • Loading branch information
johnscancella committed Dec 23, 2016
1 parent 1e67c03 commit f1a6807
Show file tree
Hide file tree
Showing 28 changed files with 132 additions and 42 deletions.
Expand Up @@ -91,9 +91,9 @@ public void testReaderWriterVersion97() throws Exception{
}

@Test
public void testReaderWriterVersion98() throws Exception{
public void testReaderWriterVersion2_0() throws Exception{
BagReader reader = new BagReader();
Path rootDir = Paths.get(this.getClass().getClassLoader().getResource("bags/v0_98/bag").toURI());
Path rootDir = Paths.get(this.getClass().getClassLoader().getResource("bags/v2_0/bag").toURI());
Bag bag = reader.read(rootDir);
Path outputDir = Paths.get(folder.newFolder().toURI());

Expand Down
Expand Up @@ -81,7 +81,7 @@ public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorith
*/
@Incubating
public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
final Bag bag = new Bag(new Version(0, 98));
final Bag bag = new Bag(new Version(2, 0));
bag.setRootDir(root);
logger.info("Creating a bag with version: [{}] in directory: [{}]", bag.getVersion(), root);

Expand Down
@@ -0,0 +1,34 @@
package gov.loc.repository.bagit.verify;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*/
abstract public class AbstractPayloadFileExistsInManifestsVistor extends SimpleFileVisitor<Path> {
protected static final Logger logger = LoggerFactory.getLogger(AbstractPayloadFileExistsInManifestsVistor.class);
protected transient final boolean ignoreHiddenFiles;

public AbstractPayloadFileExistsInManifestsVistor(final boolean ignoreHiddenFiles) {
this.ignoreHiddenFiles = ignoreHiddenFiles;
}

@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
if(ignoreHiddenFiles && Files.isHidden(dir) || dir.endsWith(Paths.get(".bagit"))){
logger.debug("Skipping [{}] cause it is a hidden folder", dir);
return FileVisitResult.SKIP_SUBTREE;
}

return FileVisitResult.CONTINUE;
}
}
31 changes: 24 additions & 7 deletions src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java
Expand Up @@ -43,6 +43,7 @@
/**
* Responsible for verifying if a bag is valid, complete
*/
@SuppressWarnings({"PMD.GodClass"}) //TODO refactor
public final class BagVerifier {
private static final Logger logger = LoggerFactory.getLogger(BagVerifier.class);

Expand Down Expand Up @@ -237,14 +238,20 @@ public void isComplete(final Bag bag, final boolean ignoreHiddenFiles) throws

final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
checkAllFilesListedInManifestExist(allFilesListedInManifests);
checkAllFilesInPayloadDirAreListedInAManifest(allFilesListedInManifests, dataDir, ignoreHiddenFiles);

if(new Version(1,0).compareTo(bag.getVersion()) < 0){
checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(allFilesListedInManifests, dataDir, ignoreHiddenFiles);
}
else{
CheckAllFilesInPayloadDirAreListedInAllManifests(bag.getPayLoadManifests(), dataDir, ignoreHiddenFiles);
}
}

/*
* Get the directory that contains the payload files.
*/
private Path getDataDir(final Bag bag){
if(bag.getVersion().compareTo(new Version(0, 98)) >= 0){ //is it a .bagit version?
if(bag.getVersion().compareTo(new Version(2, 0)) >= 0){ //is it a .bagit version?
return bag.getRootDir();
}

Expand All @@ -271,7 +278,7 @@ private void checkBagitFileExists(final Path rootDir, final Version version) thr
logger.info("Checking if bagit.txt file exists");
Path bagitFile = rootDir.resolve("bagit.txt");
//@Incubating
if(version.compareTo(new Version(0, 98)) >= 0){ //is it a .bagit version?
if(version.compareTo(new Version(2, 0)) >= 0){ //is it a .bagit version?
bagitFile = rootDir.resolve(DOT_BAGIT_DIR_NAME + File.separator + "bagit.txt");
}

Expand Down Expand Up @@ -301,7 +308,7 @@ private void checkIfAtLeastOnePayloadManifestsExist(final Path rootDir, final Ve

DirectoryStream<Path> directoryStream = Files.newDirectoryStream(rootDir);
//@Incubating
if(version.compareTo(new Version(0, 98)) >= 0){ //is it a .bagit version?
if(version.compareTo(new Version(2, 00)) >= 0){ //is it a .bagit version?
directoryStream = Files.newDirectoryStream(rootDir.resolve(DOT_BAGIT_DIR_NAME));
}

Expand All @@ -327,7 +334,7 @@ private Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException

DirectoryStream<Path> directoryStream = Files.newDirectoryStream(bag.getRootDir());
//@Incubating
if(bag.getVersion().compareTo(new Version(0, 98)) >= 0){ //is it a .bagit version?
if(bag.getVersion().compareTo(new Version(2, 00)) >= 0){ //is it a .bagit version?
directoryStream = Files.newDirectoryStream(bag.getRootDir().resolve(DOT_BAGIT_DIR_NAME));
}

Expand Down Expand Up @@ -370,10 +377,20 @@ private void checkAllFilesListedInManifestExist(final Set<Path> files) throws Fi
/*
* Make sure all files in the directory are in at least 1 manifest
*/
private void checkAllFilesInPayloadDirAreListedInAManifest(final Set<Path> filesListedInManifests, final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException{
private void checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(final Set<Path> filesListedInManifests, final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException{
logger.debug("Checking if all payload files (files in {} dir) are listed in at least one manifest", payloadDir);
if(Files.exists(payloadDir)){
Files.walkFileTree(payloadDir, new PayloadFileExistsInManifestVistor(filesListedInManifests, ignoreHiddenFiles));
Files.walkFileTree(payloadDir, new PayloadFileExistsInAtLeastOneManifestVistor(filesListedInManifests, ignoreHiddenFiles));
}
}

/*
* as per the bagit-spec 1.0+ all files have to be listed in all manifests
*/
private void CheckAllFilesInPayloadDirAreListedInAllManifests(final Set<Manifest> payLoadManifests, final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException{
logger.debug("Checking if all payload files (files in {} dir) are listed in all manifests", payloadDir);
if(Files.exists(payloadDir)){
Files.walkFileTree(payloadDir, new PayloadFileExistsInAllManifestsVistor(payLoadManifests, ignoreHiddenFiles));
}
}

Expand Down
@@ -0,0 +1,36 @@
package gov.loc.repository.bagit.verify;

import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Set;

import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.exceptions.FileNotInManifestException;

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*/
public class PayloadFileExistsInAllManifestsVistor extends AbstractPayloadFileExistsInManifestsVistor {
private transient final Set<Manifest> manifests;

public PayloadFileExistsInAllManifestsVistor(final Set<Manifest> manifests, final boolean ignoreHiddenFiles) {
super(ignoreHiddenFiles);
this.manifests = manifests;
}

@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs)throws FileNotInManifestException{
if(Files.isRegularFile(path)){
for(final Manifest manifest : manifests){
if(!manifest.getFileToChecksumMap().keySet().contains(path.normalize())){
throw new FileNotInManifestException("File " + path + " is in the payload directory but isn't listed in manifest manifest-" + manifest.getAlgorithm().getBagitName() + ".txt");
}
}
}
logger.debug("[{}] is in all manifests", path);
return FileVisitResult.CONTINUE;
}
}
@@ -1,40 +1,23 @@
package gov.loc.repository.bagit.verify;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import gov.loc.repository.bagit.exceptions.FileNotInManifestException;

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*/
public class PayloadFileExistsInManifestVistor extends SimpleFileVisitor<Path> {
private static final Logger logger = LoggerFactory.getLogger(PayloadFileExistsInManifestVistor.class);
public class PayloadFileExistsInAtLeastOneManifestVistor extends AbstractPayloadFileExistsInManifestsVistor {
private transient final Set<Path> filesListedInManifests;
private transient final boolean ignoreHiddenFiles;

public PayloadFileExistsInManifestVistor(final Set<Path> filesListedInManifests, final boolean ignoreHiddenFiles) {
public PayloadFileExistsInAtLeastOneManifestVistor(final Set<Path> filesListedInManifests, final boolean ignoreHiddenFiles) {
super(ignoreHiddenFiles);
this.filesListedInManifests = filesListedInManifests;
this.ignoreHiddenFiles = ignoreHiddenFiles;
}

@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
if(ignoreHiddenFiles && Files.isHidden(dir) || dir.endsWith(Paths.get(".bagit"))){
logger.debug("Skipping [{}] cause it is a hidden folder", dir);
return FileVisitResult.SKIP_SUBTREE;
}

return FileVisitResult.CONTINUE;
}

@Override
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/gov/loc/repository/bagit/writer/BagWriter.java
Expand Up @@ -31,7 +31,7 @@
@SuppressWarnings("PMD.TooManyMethods")
public final class BagWriter {
private static final Logger logger = LoggerFactory.getLogger(BagWriter.class);
private static final Version VERSION_0_98 = new Version(0, 98);
private static final Version VERSION_2_0 = new Version(2, 0);
private static final Version VERSION_0_95 = new Version(0, 95);

private BagWriter(){
Expand Down Expand Up @@ -76,7 +76,7 @@ public static void write(final Bag bag, final Path outputDir) throws IOException
private static Path writeVersionDependentPayloadFiles(final Bag bag, final Path outputDir) throws IOException{
Path bagitDir = outputDir;
//@Incubating
if(VERSION_0_98.compareTo(bag.getVersion()) <= 0){
if(VERSION_2_0.compareTo(bag.getVersion()) <= 0){
bagitDir = outputDir.resolve(".bagit");
Files.createDirectories(bagitDir);
writePayloadFiles(bag.getPayLoadManifests(), outputDir, bag.getRootDir());
Expand Down
Expand Up @@ -81,7 +81,7 @@ public void testCreateDotBagit() throws IOException, NoSuchAlgorithmException{
Path expectedBagitFile = dotbagitDir.resolve("bagit.txt");

Bag bag = BagCreator.createDotBagit(rootFolderPath, Arrays.asList(StandardSupportedAlgorithms.MD5), false);
assertEquals(new Version(0, 98), bag.getVersion());
assertEquals(new Version(2, 0), bag.getVersion());

assertTrue(Files.exists(expectedBagitFile));
assertTrue(Files.exists(expectedManifestFile));
Expand Down
Expand Up @@ -265,15 +265,15 @@ public void testReadVersion0_97Bag() throws Exception{
}

@Test
public void testReadVersion0_98Bag() throws Exception{
Path rootBag = Paths.get(getClass().getClassLoader().getResource("bags/v0_98/bag").toURI());
public void testReadVersion2_0Bag() throws Exception{
Path rootBag = Paths.get(getClass().getClassLoader().getResource("bags/v2_0/bag").toURI());
Path[] payloadFiles = new Path[]{rootBag.resolve("dir1/test3.txt"), rootBag.resolve("dir2/dir3/test5.txt"),
rootBag.resolve("dir2/test4.txt"), rootBag.resolve("test1.txt"), rootBag.resolve("test2.txt")};

Bag returnedBag = sut.read(rootBag);

assertNotNull(returnedBag);
assertEquals(new Version(0, 98), returnedBag.getVersion());
assertEquals(new Version(2, 0), returnedBag.getVersion());
Manifest payloadManifest = (Manifest) returnedBag.getPayLoadManifests().toArray()[0];
for(Path payloadFile : payloadFiles){
assertTrue("payload manifest should contain " + payloadFile, payloadManifest.getFileToChecksumMap().containsKey(payloadFile));
Expand Down
Expand Up @@ -118,8 +118,8 @@ public void testVersion0_97IsValid() throws Exception{
}

@Test
public void testVersion0_98IsValid() throws Exception{
rootDir = Paths.get(new File("src/test/resources/bags/v0_98/bag").toURI());
public void testVersion2_0IsValid() throws Exception{
rootDir = Paths.get(new File("src/test/resources/bags/v2_0/bag").toURI());
Bag bag = reader.read(rootDir);

sut.isValid(bag, true);
Expand Down Expand Up @@ -240,6 +240,13 @@ public void testAddSHA3SupportViaExtension() throws Exception{
extendedSut.isValid(bag, true);
}

@Test(expected=FileNotInManifestException.class)
public void testNotALlFilesListedInAllManifestsThrowsException() throws Exception{
Path bagDir = Paths.get(new File("src/test/resources/notAllFilesListedInAllManifestsBag").toURI());
Bag bag = reader.read(bagDir);
sut.isValid(bag, true);
}

/*
* Technically valid but highly discouraged
*/
Expand Down
Expand Up @@ -111,8 +111,8 @@ public void testWriteVersion97() throws Exception{
}

@Test
public void testWriteVersion98() throws Exception{
Path rootDir = Paths.get(getClass().getClassLoader().getResource("bags/v0_98/bag").toURI());
public void testWriteVersion2_0() throws Exception{
Path rootDir = Paths.get(getClass().getClassLoader().getResource("bags/v2_0/bag").toURI());
Bag bag = reader.read(rootDir);

File bagitDir = folder.newFolder();
Expand Down
@@ -1,2 +1,2 @@
BagIt-Version: 0.98
BagIt-Version: 2.0
Tag-File-Character-Encoding: UTF-8
@@ -1,3 +1,3 @@
32f027866d7141a0eaeee6e0cc5123f0 .bagit/bagit.txt
2cdf8709d5921dd1a6422c7e0a9dc017 .bagit/bagit.txt
9ee76b9f81aa504f2af0ef8f3e832d81 .bagit/manifest-md5.txt
68b1dabaea8770a0e9411dc5d99341f9 .bagit/bag-info.txt
File renamed without changes.
File renamed without changes.
@@ -0,0 +1,3 @@
Bag-Software-Agent: bagit.py v1.5.4 <http://github.com/libraryofcongress/bagit-python>
Bagging-Date: 2016-12-23
Payload-Oxum: 0.3
@@ -0,0 +1,2 @@
BagIt-Version: 1.0
Tag-File-Character-Encoding: UTF-8
Empty file.
Empty file.
Empty file.
@@ -0,0 +1,3 @@
d41d8cd98f00b204e9800998ecf8427e data/bar
d41d8cd98f00b204e9800998ecf8427e data/foo
d41d8cd98f00b204e9800998ecf8427e data/ham
@@ -0,0 +1,2 @@
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 data/bar
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 data/foo
@@ -0,0 +1,3 @@
121f38b01566d73cc41abaf5f71cc6c6 bag-info.txt
eaa2c609ff6371712f623f5531945b44 bagit.txt
35a2deeb5237cad4460aff97718ae00f manifest-md5.txt

0 comments on commit f1a6807

Please sign in to comment.