diff --git a/README.md b/README.md index d818beb2..abf01f0d 100644 --- a/README.md +++ b/README.md @@ -172,10 +172,8 @@ Thanks to [jwalk][jwalk], all there was left to do is to write a command-line in ### Limitations * Interactive mode only looks good in dark terminals (see [this issue](https://github.com/Byron/dua-cli/issues/13)) -* _Hard links_ are not understood, thus hard-linked files will possibly be counted multiple times. * _Symlinks_ are followed and we obtain the logical size of the file they point to. Ideally, we only count their actual size. -* _logical filesize_ is used instead of computed or estimating actual size on disk. * _easy fix_: file names in main window are not truncated if too large. They are cut off on the right. * There are plenty of examples in `tests/fixtures` which don't render correctly in interactive mode. This can be due to graphemes not interpreted correctly. With Chinese characters for instance, diff --git a/src/aggregate.rs b/src/aggregate.rs index 63fed08b..28a3ccdc 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -1,4 +1,4 @@ -use crate::{WalkOptions, WalkResult}; +use crate::{InodeFilter, WalkOptions, WalkResult}; use failure::Error; use std::borrow::Cow; use std::{fmt, io, path::Path}; @@ -20,6 +20,7 @@ pub fn aggregate( let mut total = 0; let mut num_roots = 0; let mut aggregates = Vec::new(); + let mut inodes = InodeFilter::default(); for path in paths.into_iter() { num_roots += 1; let mut num_bytes = 0u64; @@ -29,7 +30,7 @@ pub fn aggregate( match entry { Ok(entry) => { let file_size = match entry.metadata { - Some(Ok(ref m)) if !m.is_dir() => { + Some(Ok(ref m)) if !m.is_dir() && (options.count_links || inodes.add(m)) => { if options.apparent_size { m.len() } else { diff --git a/src/common.rs b/src/common.rs index 60d0ee3d..2d5fcbac 100644 --- a/src/common.rs +++ b/src/common.rs @@ -152,6 +152,7 @@ pub struct WalkOptions { /// for more information. pub threads: usize, pub byte_format: ByteFormat, + pub count_links: bool, pub apparent_size: bool, pub color: Color, pub sorting: TraversalSorting, diff --git a/src/inodefilter.rs b/src/inodefilter.rs new file mode 100644 index 00000000..1523f1c2 --- /dev/null +++ b/src/inodefilter.rs @@ -0,0 +1,75 @@ +#![cfg_attr(windows, feature(windows_by_handle))] + +use std::collections::HashMap; + +#[derive(Debug, Default, Clone)] +pub struct InodeFilter { + inner: HashMap, +} + +impl InodeFilter { + #[cfg(unix)] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + use std::os::unix::fs::MetadataExt; + + self.add_inode(metadata.ino(), metadata.nlink()) + } + + #[cfg(windows)] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + use std::os::windows::fs::MetadataExt; + + if let (Some(inode), Some(nlinks)) = (metadata.file_index(), metadata.number_of_links()) { + self.add_inode(inode, nlinks as u64) + } else { + true + } + } + + #[cfg(not(any(unix, windows)))] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + true + } + + pub fn add_inode(&mut self, inode: u64, nlinks: u64) -> bool { + if nlinks <= 1 { + return true; + } + + match self.inner.get_mut(&inode) { + Some(count) => { + *count -= 1; + + if *count == 0 { + self.inner.remove(&inode); + } + + false + } + None => { + self.inner.insert(inode, nlinks - 1); + true + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_filters_inodes() { + let mut inodes = InodeFilter::default(); + + assert!(inodes.add_inode(1, 2)); + assert!(!inodes.add_inode(1, 2)); + + assert!(inodes.add_inode(1, 3)); + assert!(!inodes.add_inode(1, 3)); + assert!(!inodes.add_inode(1, 3)); + + assert!(inodes.add_inode(1, 1)); + assert!(inodes.add_inode(1, 1)); + } +} diff --git a/src/interactive/app_test/utils.rs b/src/interactive/app_test/utils.rs index f4216657..d696f6c0 100644 --- a/src/interactive/app_test/utils.rs +++ b/src/interactive/app_test/utils.rs @@ -165,6 +165,7 @@ pub fn initialized_app_and_terminal_with_closure>( threads: 1, byte_format: ByteFormat::Metric, apparent_size: true, + count_links: false, color: Color::None, sorting: TraversalSorting::AlphabeticalByFileName, }, diff --git a/src/lib.rs b/src/lib.rs index 9c1c3f2b..ae6f953d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,8 +5,10 @@ extern crate jwalk; mod aggregate; mod common; +mod inodefilter; pub mod traverse; pub use aggregate::aggregate; pub use common::*; +pub(crate) use inodefilter::InodeFilter; diff --git a/src/main.rs b/src/main.rs index df7639a5..c1643a47 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,6 +30,7 @@ fn run() -> Result<(), Error> { Color::None }, apparent_size: opt.apparent_size, + count_links: opt.count_links, sorting: TraversalSorting::None, }; let res = match opt.command { diff --git a/src/options.rs b/src/options.rs index 4dc1affd..e3e44a21 100644 --- a/src/options.rs +++ b/src/options.rs @@ -56,6 +56,10 @@ pub struct Args { #[structopt(short = "A", long = "apparent-size")] pub apparent_size: bool, + /// Count hard-linked files each time they are seen + #[structopt(short = "l", long = "count-links")] + pub count_links: bool, + /// One or more input files or directories. If unset, we will use all entries in the current working directory. #[structopt(parse(from_os_str))] pub input: Vec, diff --git a/src/traverse.rs b/src/traverse.rs index c9bb4a92..13b16230 100644 --- a/src/traverse.rs +++ b/src/traverse.rs @@ -1,4 +1,4 @@ -use crate::{get_size_or_panic, WalkOptions}; +use crate::{get_size_or_panic, InodeFilter, WalkOptions}; use failure::Error; use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction}; use std::{ffi::OsString, path::PathBuf, time::Duration, time::Instant}; @@ -66,6 +66,7 @@ impl Traversal { let mut sizes_per_depth_level = Vec::new(); let mut current_size_at_depth = 0; let mut previous_depth = 0; + let mut inodes = InodeFilter::default(); let mut last_checked = Instant::now(); @@ -93,7 +94,7 @@ impl Traversal { entry.file_name }; let file_size = match entry.metadata { - Some(Ok(ref m)) if !m.is_dir() => { + Some(Ok(ref m)) if !m.is_dir() && (walk_options.count_links || inodes.add(m)) => { if walk_options.apparent_size { m.len() } else {