Skip to content

Commit

Permalink
combined path checks for manifest and fetch files and added tests for…
Browse files Browse the repository at this point in the history
… malicious urls
  • Loading branch information
johnscancella committed Dec 16, 2016
1 parent 9b0db29 commit 30669a0
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 109 deletions.
9 changes: 5 additions & 4 deletions src/main/java/gov/loc/repository/bagit/domain/FetchItem.java
@@ -1,6 +1,7 @@
package gov.loc.repository.bagit.domain;

import java.net.URL;
import java.nio.file.Path;
import java.util.Objects;

/**
Expand All @@ -19,13 +20,13 @@ public final class FetchItem {
public final Long length;

/**
* The path relative to the /data directory
* The path where the fetched item should be put
*/
public final String path;
public final Path path;

private transient String cachedString;

public FetchItem(final URL url, final Long length, final String path){
public FetchItem(final URL url, final Long length, final Path path){
this.url = url;
this.length = length;
this.path = path;
Expand Down Expand Up @@ -64,7 +65,7 @@ public Long getLength() {
return length;
}

public String getPath() {
public Path getPath() {
return path;
}

Expand Down
@@ -0,0 +1,12 @@
package gov.loc.repository.bagit.exceptions;

/**
* Class to represent an error when the bag manifest file does not conform to the bagit specfication format
*/
public class InvalidBagitFileFormatException extends Exception {
private static final long serialVersionUID = 1L;

public InvalidBagitFileFormatException(final String message){
super(message);
}
}

This file was deleted.

This file was deleted.

62 changes: 26 additions & 36 deletions src/main/java/gov/loc/repository/bagit/reader/BagReader.java
Expand Up @@ -2,12 +2,15 @@

import java.io.BufferedReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.HashMap;
Expand All @@ -22,8 +25,7 @@
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException;
import gov.loc.repository.bagit.exceptions.InvalidFetchFormatException;
import gov.loc.repository.bagit.exceptions.InvalidManifestFormatException;
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
import gov.loc.repository.bagit.exceptions.MaliciousPathException;
import gov.loc.repository.bagit.exceptions.UnparsableVersionException;
import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException;
Expand Down Expand Up @@ -61,10 +63,9 @@ public BagReader(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping
* @throws MaliciousPathException if there is path that is referenced in the manifest or fetch file that is outside the bag root directory
* @throws InvalidBagMetadataException if the metadata or bagit.txt file does not conform to the bagit spec
* @throws UnsupportedAlgorithmException if the manifest uses a algorithm that isn't supported
* @throws InvalidManifestFormatException if the manifest is not formatted properly
* @throws InvalidFetchFormatException if the fetch format does not follow the bagit specification
* @throws InvalidBagitFileFormatException if the manifest or fetch file is not formatted properly
*/
public Bag read(final Path rootDir) throws IOException, UnparsableVersionException, MaliciousPathException, InvalidBagMetadataException, UnsupportedAlgorithmException, InvalidManifestFormatException, InvalidFetchFormatException{
public Bag read(final Path rootDir) throws IOException, UnparsableVersionException, MaliciousPathException, InvalidBagMetadataException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
final Bag bag = new Bag();

//@Incubating
Expand Down Expand Up @@ -139,7 +140,7 @@ Version parseVersion(final String version) throws UnparsableVersionException{
/*
* Finds and reads all manifest files in the rootDir and adds them to the given bag.
*/
void readAllManifests(final Path rootDir, final Bag bag) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidManifestFormatException{
void readAllManifests(final Path rootDir, final Bag bag) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
logger.info("Attempting to find and read manifests");
final DirectoryStream<Path> manifests = getAllManifestFiles(rootDir);

Expand Down Expand Up @@ -183,9 +184,9 @@ public boolean accept(final Path file) throws IOException {
* @throws IOException if there is a problem reading a file
* @throws MaliciousPathException if there is path that is referenced in the manifest that is outside the bag root directory
* @throws UnsupportedAlgorithmException if the manifest uses a algorithm that isn't supported
* @throws InvalidManifestFormatException if the manifest is not formatted properly
* @throws InvalidBagitFileFormatException if the manifest is not formatted properly
*/
public Manifest readManifest(final Path manifestFile, final Path bagRootDir, final Charset charset) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidManifestFormatException{
public Manifest readManifest(final Path manifestFile, final Path bagRootDir, final Charset charset) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
logger.debug("Reading manifest [{}]", manifestFile);
final String alg = PathUtils.getFilename(manifestFile).split("[-\\.]")[1];
final SupportedAlgorithm algorithm = nameMapping.getSupportedAlgorithm(alg);
Expand All @@ -201,7 +202,7 @@ public Manifest readManifest(final Path manifestFile, final Path bagRootDir, fin
/*
* read the manifest file into a map of files and checksums
*/
Map<Path, String> readChecksumFileMap(final Path manifestFile, final Path bagRootDir, final Charset charset) throws IOException, MaliciousPathException, InvalidManifestFormatException{
Map<Path, String> readChecksumFileMap(final Path manifestFile, final Path bagRootDir, final Charset charset) throws IOException, MaliciousPathException, InvalidBagitFileFormatException{
final HashMap<Path, String> map = new HashMap<>();
final BufferedReader br = Files.newBufferedReader(manifestFile, charset);

Expand All @@ -222,22 +223,31 @@ Map<Path, String> readChecksumFileMap(final Path manifestFile, final Path bagRoo
/*
* Create the file and check it for various things, like starting with a *
*/
private Path createFileFromManifest(final Path bagRootDir, final String path) throws MaliciousPathException, InvalidManifestFormatException{
private Path createFileFromManifest(final Path bagRootDir, final String path) throws MaliciousPathException, InvalidBagitFileFormatException{
String fixedPath = path;
if(path.charAt(0) == '*'){
logger.warn("Encountered path that was created by non-bagit tool. Removing * from path. Please remove all * from manifest files!");
fixedPath = path.substring(1); //remove the * from the path
}

if(path.contains("\\")){
throw new InvalidManifestFormatException(ERROR_PREFIX + path + "] is invalid due to the use of the path separactor [\\]");
throw new InvalidBagitFileFormatException(ERROR_PREFIX + path + "] is invalid due to the use of the path separactor [\\]");
}

if(path.contains("~/")){
throw new MaliciousPathException(ERROR_PREFIX + path + "] is trying to be malicious and access a file outside the bag");
}

final Path file = bagRootDir.resolve(PathUtils.decodeFilname(fixedPath)).normalize();
fixedPath = PathUtils.decodeFilname(fixedPath);
Path file = bagRootDir.resolve(fixedPath).normalize();
if(fixedPath.startsWith("file://")){
try {
file = Paths.get(new URI(fixedPath));
} catch (URISyntaxException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

if(!file.normalize().startsWith(bagRootDir)){
throw new MaliciousPathException(ERROR_PREFIX + file + "] is outside the bag root directory of " + bagRootDir +
Expand Down Expand Up @@ -284,11 +294,11 @@ public List<SimpleImmutableEntry<String, String>> readBagMetadata(final Path roo
* @return a list of items to fetch
*
* @throws IOException if there is a problem reading a file
* @throws InvalidFetchFormatException if the fetch format does not follow the bagit specification
* @throws MaliciousPathException if the path was crafted to point outside the bag directory
* @throws InvalidBagitFileFormatException if the fetch format does not follow the bagit specification
*/
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
public List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, final Path bagRootDir) throws IOException, MaliciousPathException, InvalidFetchFormatException{
public List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, final Path bagRootDir) throws IOException, MaliciousPathException, InvalidBagitFileFormatException{
logger.info("Attempting to read [{}]", fetchFile);
final BufferedReader br = Files.newBufferedReader(fetchFile, encoding);
final List<FetchItem> itemsToFetch = new ArrayList<>();
Expand All @@ -299,12 +309,12 @@ public List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, f
URL url = null;
while(line != null){
parts = line.split("\\s+", 3);
checkPath(parts[2], bagRootDir);
final Path path = createFileFromManifest(bagRootDir, parts[2]);
length = parts[1].equals("-") ? -1 : Long.decode(parts[1]);
url = new URL(parts[0]);

logger.debug("Read URL [{}] length [{}] path [{}] from fetch file [{}]", url, length, parts[2], fetchFile);
final FetchItem itemToFetch = new FetchItem(url, length, parts[2]);
final FetchItem itemToFetch = new FetchItem(url, length, path);
itemsToFetch.add(itemToFetch);

line = br.readLine();
Expand All @@ -313,26 +323,6 @@ public List<FetchItem> readFetch(final Path fetchFile, final Charset encoding, f
return itemsToFetch;
}

/*
* Check for malicious paths as well as invalid format
*/
private void checkPath(final String path, final Path bagRoot) throws MaliciousPathException, InvalidFetchFormatException{
if(path.contains("\\")){
logger.warn("Path [{}] contains [\\] which is not allowed by the bagit specification. Use [/] instead.", path);
throw new InvalidFetchFormatException(ERROR_PREFIX + path + "] uses invalid path separator [\\]. Use [/] instead.");
}

if(path.contains("~/")){
throw new MaliciousPathException(ERROR_PREFIX + path + "] is trying to be malicious by accessing a file outside the bag");
}

final Path finalPath = bagRoot.resolve(path).normalize();
if(!finalPath.startsWith(bagRoot)){
throw new MaliciousPathException(ERROR_PREFIX + path + "] is outside the bag root directory of " + bagRoot +
"! This is not allowed according to the bagit specification!");
}
}

/*
* Generic method to read key value pairs from the bagit files, like bagit.txt or bag-info.txt
*/
Expand Down
12 changes: 6 additions & 6 deletions src/main/java/gov/loc/repository/bagit/verify/BagVerifier.java
Expand Up @@ -26,7 +26,7 @@
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.CorruptChecksumException;
import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
import gov.loc.repository.bagit.exceptions.InvalidManifestFormatException;
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
import gov.loc.repository.bagit.exceptions.InvalidPayloadOxumException;
import gov.loc.repository.bagit.exceptions.MaliciousPathException;
import gov.loc.repository.bagit.exceptions.MissingBagitFileException;
Expand Down Expand Up @@ -154,9 +154,9 @@ public void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws
* @throws MaliciousPathException if there is path that is referenced in the manifest that is outside the bag root directory
* @throws VerificationException some other exception happened during processing so capture it here.
* @throws UnsupportedAlgorithmException if the manifest uses a algorithm that isn't supported
* @throws InvalidManifestFormatException if the manifest is not formatted properly
* @throws InvalidBagitFileFormatException if the manifest is not formatted properly
*/
public void isValid(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, NoSuchAlgorithmException, MissingPayloadManifestException, MissingBagitFileException, MissingPayloadDirectoryException, FileNotInPayloadDirectoryException, InterruptedException, MaliciousPathException, CorruptChecksumException, VerificationException, UnsupportedAlgorithmException, InvalidManifestFormatException{
public void isValid(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, NoSuchAlgorithmException, MissingPayloadManifestException, MissingBagitFileException, MissingPayloadDirectoryException, FileNotInPayloadDirectoryException, InterruptedException, MaliciousPathException, CorruptChecksumException, VerificationException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
logger.info("Checking if the bag with root directory [{}] is valid.", bag.getRootDir());
isComplete(bag, ignoreHiddenFiles);

Expand Down Expand Up @@ -218,11 +218,11 @@ void checkHashes(final Manifest manifest) throws CorruptChecksumException, Inter
* @throws InterruptedException if the threads are interrupted when checking if all files are listed in manifest(s)
* @throws MaliciousPathException if there is path that is referenced in the manifest that is outside the bag root directory
* @throws UnsupportedAlgorithmException if the manifest uses a algorithm that isn't supported
* @throws InvalidManifestFormatException if the manifest is not formatted properly
* @throws InvalidBagitFileFormatException if the manifest is not formatted properly
*/
public void isComplete(final Bag bag, final boolean ignoreHiddenFiles) throws
IOException, MissingPayloadManifestException, MissingBagitFileException, MissingPayloadDirectoryException,
FileNotInPayloadDirectoryException, InterruptedException, MaliciousPathException, UnsupportedAlgorithmException, InvalidManifestFormatException{
FileNotInPayloadDirectoryException, InterruptedException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
logger.info("Checking if the bag with root directory [{}] is complete.", bag.getRootDir());

final Path dataDir = getDataDir(bag);
Expand Down Expand Up @@ -321,7 +321,7 @@ private void checkIfAtLeastOnePayloadManifestsExist(final Path rootDir, final Ve
/*
* get all the files listed in all the manifests
*/
private Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidManifestFormatException{
private Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
logger.debug("Getting all files listed in the manifest(s)");
final Set<Path> filesListedInManifests = new HashSet<>();

Expand Down
25 changes: 13 additions & 12 deletions src/test/java/gov/loc/repository/bagit/domain/FetchItemTest.java
Expand Up @@ -2,6 +2,7 @@

import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Paths;

import org.junit.Assert;
import org.junit.Before;
Expand All @@ -18,61 +19,61 @@ public void setup() throws MalformedURLException{

@Test
public void testToString() throws MalformedURLException{
FetchItem item = new FetchItem(url, 1l, "/foo");
FetchItem item = new FetchItem(url, 1l, Paths.get("/foo"));
String expected = "https://github.com/LibraryOfCongress/bagit-java 1 /foo";

assertEquals(expected, item.toString());
}

@Test
public void testHashCodeReturnsSameValueForEqualObjects(){
FetchItem item1 = new FetchItem(url, 1l, "/foo");
FetchItem item2 = new FetchItem(url, 1l, "/foo");
FetchItem item1 = new FetchItem(url, 1l, Paths.get("/foo"));
FetchItem item2 = new FetchItem(url, 1l, Paths.get("/foo"));

assertEquals(item1.hashCode(), item2.hashCode());
}

@Test
public void testHashCodeReturnsDifferentValueForDifferentObjects(){
FetchItem item1 = new FetchItem(url, 1l, "/foo");
FetchItem item2 = new FetchItem(url, 1l, "/bar");
FetchItem item1 = new FetchItem(url, 1l, Paths.get("/foo"));
FetchItem item2 = new FetchItem(url, 1l, Paths.get("/bar"));

assertNotEquals(item1.hashCode(), item2.hashCode());
}

@Test
public void testEqualsReturnsTrueWhenSameObject(){
FetchItem item = new FetchItem(url, 1l, "/foo");
FetchItem item = new FetchItem(url, 1l, Paths.get("/foo"));

assertTrue(item.equals(item));
}

@Test
public void testEqualsReturnsFalseWhenNull(){
FetchItem item = new FetchItem(url, 1l, "/foo");
FetchItem item = new FetchItem(url, 1l, Paths.get("/foo"));

assertFalse(item.equals(null));
}

@Test
public void testEqualsReturnsFalseWhenDifferentTypes(){
FetchItem item = new FetchItem(url, 1l, "/foo");
FetchItem item = new FetchItem(url, 1l, Paths.get("/foo"));

assertFalse(item.equals("foo"));
}

@Test
public void testEqualsReturnsTrueWhenSameValues(){
FetchItem item1 = new FetchItem(url, 1l, "/foo");
FetchItem item2 = new FetchItem(url, 1l, "/foo");
FetchItem item1 = new FetchItem(url, 1l, Paths.get("/foo"));
FetchItem item2 = new FetchItem(url, 1l, Paths.get("/foo"));

assertTrue(item1.equals(item2));
}

@Test
public void testEqualsReturnsFalseWhenDifferentValues(){
FetchItem item1 = new FetchItem(url, 1l, "/foo");
FetchItem item2 = new FetchItem(url, 1l, "/bar");
FetchItem item1 = new FetchItem(url, 1l, Paths.get("/foo"));
FetchItem item2 = new FetchItem(url, 1l, Paths.get("/bar"));

assertFalse(item1.equals(item2));
}
Expand Down
Expand Up @@ -3,8 +3,6 @@
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;

import org.junit.Assert;
Expand All @@ -26,12 +24,10 @@ public class FetchHttpFileExample extends Assert {
@Test
public void fetchFileUsingJavaStandardLibrary() throws IOException{
//in actual usage you would iterate over the list of FetchItem in the Bag
FetchItem item = new FetchItem(new URL("https://en.wikipedia.org/wiki/Main_Page"), 0l, folder.newFile("Main_page.html").toString());
FetchItem item = new FetchItem(new URL("https://en.wikipedia.org/wiki/Main_Page"), 0l, folder.newFile("Main_page.html").toPath());
try{
Path path = Paths.get(item.path);

Files.copy(item.url.openStream(), path, StandardCopyOption.REPLACE_EXISTING);
assertTrue(Files.exists(path));
Files.copy(item.url.openStream(), item.path, StandardCopyOption.REPLACE_EXISTING);
assertTrue(Files.exists(item.path));
}
catch(Exception e){
e.printStackTrace();
Expand Down

0 comments on commit 30669a0

Please sign in to comment.