/
ManifestVerifier.java
180 lines (158 loc) · 8.73 KB
/
ManifestVerifier.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
package com.researchworx.cresco.plugins.gobjectIngestion.objectstorage;
import com.researchworx.cresco.library.plugin.core.CPlugin;
import com.researchworx.cresco.library.utilities.CLogger;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
import gov.loc.repository.bagit.exceptions.MaliciousPathException;
import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException;
import gov.loc.repository.bagit.hash.BagitAlgorithmNameToSupportedAlgorithmMapping;
import gov.loc.repository.bagit.hash.StandardBagitAlgorithmNameToSupportedAlgorithmMapping;
import gov.loc.repository.bagit.reader.ManifestReader;
import gov.loc.repository.bagit.util.PathUtils;
import gov.loc.repository.bagit.verify.CheckIfFileExistsTask;
import gov.loc.repository.bagit.verify.PayloadFileExistsInAllManifestsVistor;
import gov.loc.repository.bagit.verify.PayloadFileExistsInAtLeastOneManifestVistor;
import org.slf4j.helpers.MessageFormatter;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.concurrent.*;
public class ManifestVerifier implements AutoCloseable {
private static CLogger logger = new CLogger(ManifestVerifier.class, new LinkedBlockingQueue<>(), "", "", "");
public static void setLogger(CPlugin plugin) {
logger = new CLogger(ManifestVerifier.class, plugin.getMsgOutQueue(), plugin.getRegion(), plugin.getAgent(), plugin.getPluginID(), CLogger.Level.Trace);
}
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
private transient final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping;
private transient final ExecutorService executor;
/**
* Create a PayloadVerifier using a cached thread pool and the
* {@link StandardBagitAlgorithmNameToSupportedAlgorithmMapping} mapping
*/
public ManifestVerifier(){
this(new StandardBagitAlgorithmNameToSupportedAlgorithmMapping(), Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
}
/**
* Create a PayloadVerifier using a cached thread pool and a custom mapping
*
* @param nameMapping the mapping between BagIt algorithm name and the java supported algorithm
*/
public ManifestVerifier(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping) {
this(nameMapping, Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
}
/**
* Create a PayloadVerifier using a custom thread pool and the
* {@link StandardBagitAlgorithmNameToSupportedAlgorithmMapping} mapping
*
* @param executor the thread pool to use when doing work
*/
public ManifestVerifier(final ExecutorService executor) {
this(new StandardBagitAlgorithmNameToSupportedAlgorithmMapping(), executor);
}
/**
* Create a PayloadVerifier using a custom thread pool and a custom mapping
*
* @param nameMapping the mapping between BagIt algorithm name and the java supported algorithm
* @param executor the thread pool to use when doing work
*/
public ManifestVerifier(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping, final ExecutorService executor) {
this.nameMapping = nameMapping;
this.executor = executor;
}
@Override
public void close() throws SecurityException{
//shutdown the thread pool so the resource isn't leaked
executor.shutdown();
}
/**
* Verify that all the files in the payload directory are listed in the payload manifest and
* all files listed in all manifests exist.
*
* @param bag the bag to check to check
* @param ignoreHiddenFiles to ignore hidden files unless they are specifically listed in a manifest
*
* @throws IOException if there is a problem reading a file
* @throws MaliciousPathException the path in the manifest was specifically crafted to cause harm
* @throws UnsupportedAlgorithmException if the algorithm used for the manifest is unsupported
* @throws InvalidBagitFileFormatException if any of the manifests don't conform to the bagit specification
* @throws FileNotInPayloadDirectoryException if a file is listed in a manifest but doesn't exist in the payload directory
* @throws InterruptedException if a thread is interrupted while doing work
*/
public void verifyManifests(final Bag bag, final boolean ignoreHiddenFiles)
throws IOException, MaliciousPathException, UnsupportedAlgorithmException,
InvalidBagitFileFormatException, FileNotInPayloadDirectoryException, InterruptedException {
final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
checkAllFilesListedInManifestExist(allFilesListedInManifests);
if (bag.getVersion().isOlder(new Version(1, 0))) {
checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(allFilesListedInManifests, PathUtils.getDataDir(bag), ignoreHiddenFiles);
} else {
CheckAllFilesInPayloadDirAreListedInAllManifests(bag.getPayLoadManifests(), PathUtils.getDataDir(bag), ignoreHiddenFiles);
}
}
/*
* get all the files listed in all the manifests
*/
private Set<Path> getAllFilesListedInManifests(final Bag bag)
throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException {
logger.debug(messages.getString("all_files_in_manifests"));
final Set<Path> filesListedInManifests = new HashSet<>();
try(DirectoryStream<Path> directoryStream =
Files.newDirectoryStream(PathUtils.getBagitDir(bag.getVersion(), bag.getRootDir()))){
for (final Path path : directoryStream) {
final String filename = PathUtils.getFilename(path);
if (filename.startsWith("tagmanifest-") || filename.startsWith("manifest-")) {
logger.debug(messages.getString("get_listing_in_manifest"), path);
final Manifest manifest = ManifestReader.readManifest(nameMapping, path, bag.getRootDir(),
bag.getFileEncoding());
filesListedInManifests.addAll(manifest.getFileToChecksumMap().keySet());
}
}
}
return filesListedInManifests;
}
/*
* Make sure all the listed files actually exist
*/
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
private void checkAllFilesListedInManifestExist(final Set<Path> files) throws FileNotInPayloadDirectoryException, InterruptedException {
final CountDownLatch latch = new CountDownLatch(files.size());
final Set<Path> missingFiles = new ConcurrentSkipListSet<>();
logger.info(messages.getString("check_all_files_in_manifests_exist"));
for (final Path file : files) {
executor.execute(new CheckIfFileExistsTask(file, missingFiles, latch));
}
latch.await();
if (!missingFiles.isEmpty()) {
final String formattedMessage = messages.getString("missing_payload_files_error");
throw new FileNotInPayloadDirectoryException(MessageFormatter.format(formattedMessage, missingFiles).getMessage());
}
}
/*
* Make sure all files in the directory are in at least 1 manifest
*/
private static void checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(final Set<Path> filesListedInManifests,
final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException {
logger.debug(messages.getString("checking_file_in_at_least_one_manifest"), payloadDir);
if (Files.exists(payloadDir)) {
Files.walkFileTree(payloadDir,
new PayloadFileExistsInAtLeastOneManifestVistor(filesListedInManifests, ignoreHiddenFiles));
}
}
/*
* as per the bagit-spec 1.0+ all files have to be listed in all manifests
*/
private static void CheckAllFilesInPayloadDirAreListedInAllManifests(final Set<Manifest> payLoadManifests,
final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException {
logger.debug(messages.getString("checking_file_in_all_manifests"), payloadDir);
if (Files.exists(payloadDir)) {
Files.walkFileTree(payloadDir, new PayloadFileExistsInAllManifestsVistor(payLoadManifests, ignoreHiddenFiles));
}
}
}