-
Notifications
You must be signed in to change notification settings - Fork 351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
b3sum: Implement recursive file hashing #170
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,8 @@ use anyhow::{bail, ensure, Context, Result}; | |
use clap::{App, Arg}; | ||
use std::cmp; | ||
use std::convert::TryInto; | ||
use std::fs::File; | ||
use std::fs::{self, File}; | ||
use std::fs::metadata; | ||
use std::io; | ||
use std::io::prelude::*; | ||
use std::path::{Path, PathBuf}; | ||
|
@@ -22,6 +23,7 @@ const NUM_THREADS_ARG: &str = "num-threads"; | |
const RAW_ARG: &str = "raw"; | ||
const CHECK_ARG: &str = "check"; | ||
const QUIET_ARG: &str = "quiet"; | ||
const RECURSE_ARG: &str = "recurse"; | ||
|
||
struct Args { | ||
inner: clap::ArgMatches, | ||
|
@@ -114,6 +116,14 @@ impl Args { | |
Must be used with --check.", | ||
), | ||
) | ||
.arg( | ||
Arg::with_name(RECURSE_ARG) | ||
.long(RECURSE_ARG) | ||
.short("r") | ||
.help( | ||
"Recurse through any directories supplied", | ||
), | ||
) | ||
// wild::args_os() is equivalent to std::env::args_os() on Unix, | ||
// but on Windows it adds support for globbing. | ||
.get_matches_from(wild::args_os()); | ||
|
@@ -184,6 +194,10 @@ impl Args { | |
fn quiet(&self) -> bool { | ||
self.inner.is_present(QUIET_ARG) | ||
} | ||
|
||
fn recurse(&self) -> bool { | ||
self.inner.is_present(RECURSE_ARG) | ||
} | ||
} | ||
|
||
enum Input { | ||
|
@@ -496,26 +510,43 @@ fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> { | |
} | ||
|
||
fn hash_one_input(path: &Path, args: &Args) -> Result<()> { | ||
let mut input = Input::open(path, args)?; | ||
let output = input.hash(args)?; | ||
if args.raw() { | ||
write_raw_output(output, args)?; | ||
return Ok(()); | ||
} | ||
if args.no_names() { | ||
let md = metadata(path).unwrap(); | ||
if md.is_dir() && args.recurse() { | ||
let mut entries = fs::read_dir(path)? | ||
.map(|res| res.map(|e| e.path())) | ||
.collect::<Result<Vec<_>, io::Error>>()?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can neither confirm nor deny this was copied from some documentation I looked up. 😁 |
||
|
||
//Sort the directory entries as the order they're returned is undefined | ||
entries.sort(); | ||
for entry in entries { | ||
let result = hash_one_input(&entry, &args); | ||
if let Err(e) = result { | ||
eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e); | ||
return Err(e); | ||
} | ||
} | ||
} else { | ||
let mut input = Input::open(path, args)?; | ||
let output = input.hash(args)?; | ||
if args.raw() { | ||
write_raw_output(output, args)?; | ||
return Ok(()); | ||
} | ||
if args.no_names() { | ||
write_hex_output(output, args)?; | ||
println!(); | ||
return Ok(()); | ||
} | ||
let FilepathString { | ||
filepath_string, | ||
is_escaped, | ||
} = filepath_to_string(path); | ||
if is_escaped { | ||
print!("\\"); | ||
} | ||
write_hex_output(output, args)?; | ||
println!(); | ||
return Ok(()); | ||
} | ||
let FilepathString { | ||
filepath_string, | ||
is_escaped, | ||
} = filepath_to_string(path); | ||
if is_escaped { | ||
print!("\\"); | ||
println!(" {}", filepath_string); | ||
} | ||
write_hex_output(output, args)?; | ||
println!(" {}", filepath_string); | ||
Ok(()) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you thought about what will happen for directory entries that are symlinks in this case? It looks like if they're symlinks to directories, then the'll be opened as though they were files, which will probably cause an error. But in fixing this, we have to be careful not to infinitely loop on circular symlinks. The right thing to do here isn't clear to me, and we might want to look at what similar recursive tools do.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, good point, I hadn't considered that. My use-case is on Windows where symlinks are very unlikely to exist but I'll add another option
--follow-symlinks
to follow them, and investigate how to detect loops. IMHO it would make sense not to follow them by default but I don't mind doing the opposite and having a--no-follow-symlinks
option.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we need something similar to Javascript
WeakSet
to track symlinks, thereby avoiding infinite loops.Edit: found it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Directory junctions have been commonly used by Microsoft to ensure backward compatibility when OS upgrades changed the directory structure, e.g. the XP => Vista migration created
%userprofile%/My Documents <<===>> %userprofile%/Documents
.