From 378f591569b5365524bf483488561951e8bda0cd Mon Sep 17 00:00:00 2001 From: thomasthelen Date: Wed, 15 Mar 2023 21:28:09 -0700 Subject: [PATCH] Use HashMap for tracking SpeedFile objects and paths --- .../org/dataone/speedbagit/SpeedBagIt.java | 59 ++++++------------- .../org/dataone/speedbagit/ProfilingTest.java | 4 +- .../dataone/speedbagit/SpeedBagItTest.java | 17 +++--- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/dataone/speedbagit/SpeedBagIt.java b/src/main/java/org/dataone/speedbagit/SpeedBagIt.java index dab06e8..6f3df67 100644 --- a/src/main/java/org/dataone/speedbagit/SpeedBagIt.java +++ b/src/main/java/org/dataone/speedbagit/SpeedBagIt.java @@ -33,10 +33,8 @@ import java.text.SimpleDateFormat; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; import java.util.Date; import java.util.HashMap; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -74,9 +72,10 @@ public class SpeedBagIt { // Map of key-values that go in the bagit.txt file public Map bagitMetadata; - // A list holding all of the files in the bag - private List dataFiles; - private List tagFiles; + // Containers for keeping track of tag & data files, keyed off of + // their relative file path + private HashMap dataFiles; + private HashMap tagFiles; // An ExecutorService to run the piped stream in another thread private static ExecutorService executor = null; @@ -101,8 +100,8 @@ public SpeedBagIt(double version, Map bagitMetadata) throws IOException { this.version = version; this.checksumAlgorithm = checksumAlgorithm; - this.dataFiles = new ArrayList<>(); - this.tagFiles = new ArrayList<>(); + this.dataFiles = new HashMap<>(); + this.tagFiles = new HashMap<>(); this.bagitMetadata = bagitMetadata; this.dataManifestFile = new HashMap<> (); this.tagManifestFile = new HashMap<> (); @@ -120,17 +119,7 @@ public SpeedBagIt(double version, */ public SpeedBagIt(double version, String checksumAlgorithm) throws IOException { - this.version = version; - this.checksumAlgorithm = checksumAlgorithm; - this.dataFiles = new ArrayList<>(); - this.tagFiles = new ArrayList<>(); - this.bagitMetadata = new HashMap<> (); - this.dataManifestFile = new HashMap<> (); - this.tagManifestFile = new HashMap<> (); - - this.properties = new Properties(); - this.properties.load(Objects.requireNonNull(this.getClass(). - getClassLoader().getResourceAsStream("speed-bagit.properties"))); + this(version, checksumAlgorithm, new HashMap<>()); } /** @@ -152,9 +141,9 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo } SpeedFile newFile = new SpeedFile(new SpeedStream(file, checksum), bagPath, isTagFile); if (isTagFile) { - this.tagFiles.add(newFile); + this.tagFiles.put(bagPath, newFile); } else { - this.dataFiles.add(newFile); + this.dataFiles.put(bagPath, newFile); } } @@ -166,23 +155,13 @@ public void addFile(InputStream file, String bagPath, MessageDigest checksum, bo */ private boolean hasPathCollisions(String path, boolean isTagFile) { if (isTagFile) { - for (SpeedFile tagFile : this.tagFiles) { - if (Objects.equals(tagFile.getPath(), path)) { - return true; - } - } + return this.tagFiles.containsKey(path); } else { - for (SpeedFile dataFile : this.dataFiles) { - if (Objects.equals(dataFile.getPath(), path)) { - return true; - } - } + return this.dataFiles.containsKey(path); } - return false; } - /** * Adds a stream of data to the bag. * @@ -317,9 +296,9 @@ public void run() { String timeStamp = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss").format(new Date()); logger.info(String.format("Streaming bag at %s", timeStamp)); int totalSize = 0; - // Stream all of the files in the root 'data' directory + // Stream all the files in the root 'data' directory - for (SpeedFile streamingFile : dataFiles) { + for (SpeedFile streamingFile : dataFiles.values()) { try { streamFile(zos, streamingFile); String checksum = new String(streamingFile.getStream().getChecksum()); @@ -351,8 +330,8 @@ public void run() { fileStream = new ByteArrayInputStream(dataManifest.getBytes(StandardCharsets.UTF_8)); addFile(fileStream, fileName, MessageDigest.getInstance(checksumAlgorithm), true); - // Write all of the tag files - for (SpeedFile streamingFile : tagFiles) { + // Write all the tag files + for (SpeedFile streamingFile : tagFiles.values()) { try { streamFile(zos, streamingFile); String checksum = streamingFile.getStream().getChecksum(); @@ -394,11 +373,11 @@ public int getPayloadFileCount() { return this.dataFiles.size(); } /** - * Returns all of the tag files that have been added to + * Returns all the tag files that have been added to * the bag. - * @return List of tag files + * @return HashMap of tag files */ - public List getTagFiles() { + public HashMap getTagFiles() { return this.tagFiles; } @@ -407,7 +386,7 @@ public List getTagFiles() { * to the bag. These are the files that belong under data/ * @return List of data files */ - public List getDataFiles() { + public HashMap getDataFiles() { return this.dataFiles; } diff --git a/src/test/java/org/dataone/speedbagit/ProfilingTest.java b/src/test/java/org/dataone/speedbagit/ProfilingTest.java index 32c5f50..5b6672c 100644 --- a/src/test/java/org/dataone/speedbagit/ProfilingTest.java +++ b/src/test/java/org/dataone/speedbagit/ProfilingTest.java @@ -43,8 +43,8 @@ /** * A suite of tests that should be run under a profiler and ignored by CI systems and ordinary builds. - * These should be run before each release to ensure that emory management is - * sane (ie entire files aren't loaded into memory at once). + * These should be run before each release to ensure that memory management is + * working properly (ie entire files aren't loaded into memory at once). */ public class ProfilingTest { diff --git a/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java b/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java index c0a2a3f..174d3fd 100644 --- a/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java +++ b/src/test/java/org/dataone/speedbagit/SpeedBagItTest.java @@ -49,10 +49,7 @@ import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - +import static org.junit.jupiter.api.Assertions.*; /** @@ -340,15 +337,15 @@ public void testAddFile() throws SpeedBagException, NoSuchAlgorithmException, IO SpeedBagIt bag = getStockBag(); - List dataFiles = bag.getDataFiles(); + HashMap dataFiles = bag.getDataFiles(); assert dataFiles.size() == 2; - for (SpeedFile dataFile: dataFiles) { + for (SpeedFile dataFile: dataFiles.values()) { assert expectedDataPaths.contains(dataFile.getPath()); } - List metadataFiles = bag.getTagFiles(); + HashMap metadataFiles = bag.getTagFiles(); assert metadataFiles.size() == 2; - for (SpeedFile tagFile: metadataFiles) { + for (SpeedFile tagFile: metadataFiles.values()) { assert expecteMetadataPaths.contains(tagFile.getPath()); } } @@ -356,14 +353,14 @@ public void testAddFile() throws SpeedBagException, NoSuchAlgorithmException, IO @Test public void testGetDataFiles() throws SpeedBagException, NoSuchAlgorithmException, IOException { SpeedBagIt bag = getStockBag(); - List dataFiles = bag.getDataFiles(); + HashMap dataFiles = bag.getDataFiles(); assert dataFiles.size() == 2; } @Test public void testGetTagFiles() throws SpeedBagException, NoSuchAlgorithmException, IOException { SpeedBagIt bag = getStockBag(); - List metadataFiles = bag.getTagFiles(); + HashMap metadataFiles = bag.getTagFiles(); assert metadataFiles.size() == 2; }