From 4351d79d47bb91c631efb46a2ab7455f79ae4516 Mon Sep 17 00:00:00 2001 From: thomasthelen Date: Tue, 14 Mar 2023 20:29:35 -0700 Subject: [PATCH] Check for name collisions between files as they're added to the bag --- .../org/dataone/speedbagit/SpeedBagIt.java | 59 ++++++++++++++----- .../org/dataone/speedbagit/ProfilingTest.java | 7 ++- .../dataone/speedbagit/SpeedBagItTest.java | 35 ++++++++++- 3 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/speedbagit/SpeedBagIt.java b/src/main/java/org/dataone/speedbagit/SpeedBagIt.java index 4f1db82..dab06e8 100644 --- a/src/main/java/org/dataone/speedbagit/SpeedBagIt.java +++ b/src/main/java/org/dataone/speedbagit/SpeedBagIt.java @@ -128,7 +128,6 @@ public SpeedBagIt(double version, this.dataManifestFile = new HashMap<> (); this.tagManifestFile = new HashMap<> (); - this.properties = new Properties(); this.properties.load(Objects.requireNonNull(this.getClass(). getClassLoader().getResourceAsStream("speed-bagit.properties"))); @@ -138,19 +137,51 @@ public SpeedBagIt(double version, * Adds a stream of data to the bag. * * @param file: The stream representing a file or data that will be placed in the bag - * @param bagPath: The path, relative to the bag root where the file belongs - * @param checksum: A MessageDigest object that will hold the checksum + * @param bagPath: The path, relative to the bag root where the file belongs + * @param checksum: A MessageDigest object that will hold the checksum * @param isTagFile: Boolean set to True when the file is a tag file */ - public void addFile(InputStream file, String bagPath, MessageDigest checksum, boolean isTagFile) { + public void addFile(InputStream file, String bagPath, MessageDigest checksum, boolean isTagFile) + throws SpeedBagException { logger.debug(String.format("Adding %s to the bag", bagPath)); + // Check to see if there's a path conflict + if (this.hasPathCollisions(bagPath, isTagFile)) { + throw new SpeedBagException( + String.format("The tag file with path %s conflicts with another file.", bagPath) + ); + } SpeedFile newFile = new SpeedFile(new SpeedStream(file, checksum), bagPath, isTagFile); - if (isTagFile) { - this.tagFiles.add(newFile); - } else { - this.dataFiles.add(newFile); + if (isTagFile) { + this.tagFiles.add(newFile); + } else { + this.dataFiles.add(newFile); + } + } + + /** + * Checks whether two paths collide, based on their file type (tag vs data file). + * + * @param path: Path being checked against the previously added files + * @param isTagFile: A flag whether the file is a data file or not (otherwise it will be a tag) + */ + private boolean hasPathCollisions(String path, boolean isTagFile) { + if (isTagFile) { + for (SpeedFile tagFile : this.tagFiles) { + if (Objects.equals(tagFile.getPath(), path)) { + return true; + } + } + } else { + for (SpeedFile dataFile : this.dataFiles) { + if (Objects.equals(dataFile.getPath(), path)) { + return true; + } + } + } + return false; } - } + + /** * Adds a stream of data to the bag. @@ -159,15 +190,10 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo * @param bagPath: The path, relative to the bag root where the file belongs * @param isTagFile: Boolean set to True when the file is a tag file */ - public void addFile(InputStream file, String bagPath, boolean isTagFile) throws NoSuchAlgorithmException { + public void addFile(InputStream file, String bagPath, boolean isTagFile) throws NoSuchAlgorithmException, SpeedBagException { logger.debug(String.format("Adding %s to the bag", bagPath)); MessageDigest newDigest = MessageDigest.getInstance(this.checksumAlgorithm); - SpeedFile newFile = new SpeedFile(new SpeedStream(file, newDigest), bagPath, isTagFile); - if (isTagFile) { - this.tagFiles.add(newFile); - } else { - this.dataFiles.add(newFile); - } + this.addFile(file, bagPath, newDigest, isTagFile); } /** @@ -399,3 +425,4 @@ public static String bagFileToString(Map mapFile) { return builder.toString(); } } + diff --git a/src/test/java/org/dataone/speedbagit/ProfilingTest.java b/src/test/java/org/dataone/speedbagit/ProfilingTest.java index 0d3cc88..32c5f50 100644 --- a/src/test/java/org/dataone/speedbagit/ProfilingTest.java +++ b/src/test/java/org/dataone/speedbagit/ProfilingTest.java @@ -54,7 +54,7 @@ public class ProfilingTest { */ @Test @Disabled - public void testLargeFiles() throws IOException, NoSuchAlgorithmException { + public void testLargeFiles() throws IOException, NoSuchAlgorithmException, SpeedBagException { // Create 100, 1GB files GenerateFiles("largeFiles/", 100, 1000000000L); CreateBag("largeFiles/", "./bagged_data.zip"); @@ -66,7 +66,7 @@ public void testLargeFiles() throws IOException, NoSuchAlgorithmException { */ @Test @Disabled - public void testSmallFiles() throws IOException, NoSuchAlgorithmException { + public void testSmallFiles() throws IOException, NoSuchAlgorithmException, SpeedBagException { GenerateFiles("smallFiles/", 5000, 1000); CreateBag("smallFiles/", "./bagged_data.zip"); } @@ -118,7 +118,7 @@ public static void GenerateFiles(String targetDirectory, int fileCount, long tar * @param PayloadPath The path to the data directory that will be bagged * @param bagPath The path to the bagit archive that will be created */ - public static void CreateBag(String PayloadPath, String bagPath) throws IOException, NoSuchAlgorithmException { + public static void CreateBag(String PayloadPath, String bagPath) throws IOException, NoSuchAlgorithmException, SpeedBagException { SpeedBagIt bag = new SpeedBagIt(1.0, "MD5"); File dataDirectory = new File(PayloadPath); File[] directoryListing = dataDirectory.listFiles(); @@ -140,3 +140,4 @@ public static void CreateBag(String PayloadPath, String bagPath) throws IOExcept IOUtils.copy(bagStream, fos); } } + diff --git a/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java b/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java index 797bf4c..c0a2a3f 100644 --- a/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java +++ b/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java @@ -48,11 +48,13 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; + /** * Unit tests for the SpeedBagIt class. Because this class is the main interface for * creating bags, most integrated unit tests are in this file. @@ -137,7 +139,7 @@ public void testCtorMetadata() throws IOException { * Helper method that creates a stock Bag * @return The SpeedBag object */ - public SpeedBagIt getStockBag() throws NoSuchAlgorithmException, IOException { + public SpeedBagIt getStockBag() throws NoSuchAlgorithmException, IOException, SpeedBagException { double bagVersion = 1.0; String checksumAlgorithm = "MD5"; Map bagMetadata = new HashMap<>(); @@ -400,7 +402,7 @@ public void testDataBagExport() { this.validateBagItFiles(zipFile, bagVersion, bag.getPayloadFileCount(), checksumAlgorithm); Files.delete(bagFilePath); - } catch (IOException | NoSuchAlgorithmException e) { + } catch (IOException | NoSuchAlgorithmException | SpeedBagException e) { fail(e); } } @@ -441,8 +443,35 @@ public void testMetadataBagExport() { // Make sure that the bag files are correct this.validateBagItFiles(zipFile, bagVersion, bag.getPayloadFileCount(), checksumAlgorithm); Files.delete(bagFilePath); - } catch (IOException | NoSuchAlgorithmException e) { + } catch (IOException | NoSuchAlgorithmException | SpeedBagException e) { fail(e); } } + + /** + * Tests that when two files are added with the same name, SpeedBagException is thrown. + */ + @Test + void testDuplicateAddFile() throws IOException, NoSuchAlgorithmException, SpeedBagException { + double bagVersion = 1.0; + String checksumAlgorithm = "MD5"; + Map bagMetadata = new HashMap<>(); + SpeedBagIt bag = new SpeedBagIt(bagVersion, checksumAlgorithm, bagMetadata); + + String dataFile1 = "1234, 9876, 3845"; + String dataFile2 = "trees, cars, bridges"; + InputStream dataFile1Stream = new ByteArrayInputStream(dataFile1.getBytes(StandardCharsets.UTF_8)); + InputStream dataFile2Stream = new ByteArrayInputStream(dataFile2.getBytes(StandardCharsets.UTF_8)); + // Check standard data files + bag.addFile(dataFile1Stream, "data/data_file1.csv", false); + assertThrows(SpeedBagException.class, ()-> { + bag.addFile(dataFile2Stream, "data/data_file1.csv", false); + }); + // Check tag files + bag.addFile(dataFile1Stream, "tag/data_file1.csv", true); + assertThrows(SpeedBagException.class, ()-> { + bag.addFile(dataFile2Stream, "tag/data_file1.csv", true); + }); + } } +