From 5b522946adb5bb71dd51068eee5f1136e6403b31 Mon Sep 17 00:00:00 2001 From: Thomas Hurst Date: Sat, 22 Feb 2020 04:51:14 +0000 Subject: [PATCH 1/3] Add hardlink tracking, and an option to disable it --- src/aggregate.rs | 5 +- src/common.rs | 1 + src/inodefilter.rs | 76 +++++++++++++++++++++++++++++++ src/interactive/app_test/utils.rs | 1 + src/lib.rs | 2 + src/main.rs | 1 + src/options.rs | 4 ++ src/traverse.rs | 5 +- 8 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 src/inodefilter.rs diff --git a/src/aggregate.rs b/src/aggregate.rs index 63fed08b..b847de5c 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -1,4 +1,4 @@ -use crate::{WalkOptions, WalkResult}; +use crate::{WalkOptions, WalkResult, InodeFilter}; use failure::Error; use std::borrow::Cow; use std::{fmt, io, path::Path}; @@ -20,6 +20,7 @@ pub fn aggregate( let mut total = 0; let mut num_roots = 0; let mut aggregates = Vec::new(); + let mut inodes = InodeFilter::default(); for path in paths.into_iter() { num_roots += 1; let mut num_bytes = 0u64; @@ -29,7 +30,7 @@ pub fn aggregate( match entry { Ok(entry) => { let file_size = match entry.metadata { - Some(Ok(ref m)) if !m.is_dir() => { + Some(Ok(ref m)) if !m.is_dir() && (options.count_links || inodes.add(m)) => { if options.apparent_size { m.len() } else { diff --git a/src/common.rs b/src/common.rs index 60d0ee3d..2d5fcbac 100644 --- a/src/common.rs +++ b/src/common.rs @@ -152,6 +152,7 @@ pub struct WalkOptions { /// for more information. pub threads: usize, pub byte_format: ByteFormat, + pub count_links: bool, pub apparent_size: bool, pub color: Color, pub sorting: TraversalSorting, diff --git a/src/inodefilter.rs b/src/inodefilter.rs new file mode 100644 index 00000000..6db12e15 --- /dev/null +++ b/src/inodefilter.rs @@ -0,0 +1,76 @@ + +#![cfg_attr(windows, feature(windows_by_handle))] + +use std::collections::HashMap; + +#[derive(Debug, Default, Clone)] +pub struct InodeFilter { + inner: HashMap +} + +impl InodeFilter { + #[cfg(unix)] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + use std::os::unix::fs::MetadataExt; + + self.add_inode(metadata.ino(), metadata.nlink()) + } + + #[cfg(windows)] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + use std::os::windows::fs::MetadataExt; + + if let (Some(inode), Some(nlinks)) = (metadata.file_index(), metadata.number_of_links()) { + self.add_inode(inode, nlinks as u64) + } else { + true + } + } + + #[cfg(not(any(unix, windows)))] + pub fn add(&mut self, metadata: &std::fs::Metadata) -> bool { + true + } + + pub fn add_inode(&mut self, inode: u64, nlinks: u64) -> bool { + if nlinks <= 1 { + return true; + } + + match self.inner.get_mut(&inode) { + Some(count) => { + *count -= 1; + + if *count == 0 { + self.inner.remove(&inode); + } + + false + }, + None => { + self.inner.insert(inode, nlinks - 1); + true + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_filters_inodes() { + let mut inodes = InodeFilter::default(); + + assert!(inodes.add_inode(1, 2)); + assert!(!inodes.add_inode(1, 2)); + + assert!(inodes.add_inode(1, 3)); + assert!(!inodes.add_inode(1, 3)); + assert!(!inodes.add_inode(1, 3)); + + assert!(inodes.add_inode(1, 1)); + assert!(inodes.add_inode(1, 1)); + } +} diff --git a/src/interactive/app_test/utils.rs b/src/interactive/app_test/utils.rs index f4216657..d696f6c0 100644 --- a/src/interactive/app_test/utils.rs +++ b/src/interactive/app_test/utils.rs @@ -165,6 +165,7 @@ pub fn initialized_app_and_terminal_with_closure>( threads: 1, byte_format: ByteFormat::Metric, apparent_size: true, + count_links: false, color: Color::None, sorting: TraversalSorting::AlphabeticalByFileName, }, diff --git a/src/lib.rs b/src/lib.rs index 9c1c3f2b..ae6f953d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,8 +5,10 @@ extern crate jwalk; mod aggregate; mod common; +mod inodefilter; pub mod traverse; pub use aggregate::aggregate; pub use common::*; +pub(crate) use inodefilter::InodeFilter; diff --git a/src/main.rs b/src/main.rs index df7639a5..c1643a47 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,6 +30,7 @@ fn run() -> Result<(), Error> { Color::None }, apparent_size: opt.apparent_size, + count_links: opt.count_links, sorting: TraversalSorting::None, }; let res = match opt.command { diff --git a/src/options.rs b/src/options.rs index 4dc1affd..e3e44a21 100644 --- a/src/options.rs +++ b/src/options.rs @@ -56,6 +56,10 @@ pub struct Args { #[structopt(short = "A", long = "apparent-size")] pub apparent_size: bool, + /// Count hard-linked files each time they are seen + #[structopt(short = "l", long = "count-links")] + pub count_links: bool, + /// One or more input files or directories. If unset, we will use all entries in the current working directory. #[structopt(parse(from_os_str))] pub input: Vec, diff --git a/src/traverse.rs b/src/traverse.rs index c9bb4a92..6e866c21 100644 --- a/src/traverse.rs +++ b/src/traverse.rs @@ -1,4 +1,4 @@ -use crate::{get_size_or_panic, WalkOptions}; +use crate::{get_size_or_panic, WalkOptions, InodeFilter}; use failure::Error; use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction}; use std::{ffi::OsString, path::PathBuf, time::Duration, time::Instant}; @@ -66,6 +66,7 @@ impl Traversal { let mut sizes_per_depth_level = Vec::new(); let mut current_size_at_depth = 0; let mut previous_depth = 0; + let mut inodes = InodeFilter::default(); let mut last_checked = Instant::now(); @@ -93,7 +94,7 @@ impl Traversal { entry.file_name }; let file_size = match entry.metadata { - Some(Ok(ref m)) if !m.is_dir() => { + Some(Ok(ref m)) if !m.is_dir() && (walk_options.count_links || inodes.add(m)) => { if walk_options.apparent_size { m.len() } else { From ba7b071af53444cf33ed6a11aae02b34bc26c82b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 22 Feb 2020 13:30:43 +0800 Subject: [PATCH 2/3] cargo fmt --- src/aggregate.rs | 2 +- src/inodefilter.rs | 5 ++--- src/traverse.rs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/aggregate.rs b/src/aggregate.rs index b847de5c..28a3ccdc 100644 --- a/src/aggregate.rs +++ b/src/aggregate.rs @@ -1,4 +1,4 @@ -use crate::{WalkOptions, WalkResult, InodeFilter}; +use crate::{InodeFilter, WalkOptions, WalkResult}; use failure::Error; use std::borrow::Cow; use std::{fmt, io, path::Path}; diff --git a/src/inodefilter.rs b/src/inodefilter.rs index 6db12e15..1523f1c2 100644 --- a/src/inodefilter.rs +++ b/src/inodefilter.rs @@ -1,11 +1,10 @@ - #![cfg_attr(windows, feature(windows_by_handle))] use std::collections::HashMap; #[derive(Debug, Default, Clone)] pub struct InodeFilter { - inner: HashMap + inner: HashMap, } impl InodeFilter { @@ -46,7 +45,7 @@ impl InodeFilter { } false - }, + } None => { self.inner.insert(inode, nlinks - 1); true diff --git a/src/traverse.rs b/src/traverse.rs index 6e866c21..13b16230 100644 --- a/src/traverse.rs +++ b/src/traverse.rs @@ -1,4 +1,4 @@ -use crate::{get_size_or_panic, WalkOptions, InodeFilter}; +use crate::{get_size_or_panic, InodeFilter, WalkOptions}; use failure::Error; use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction}; use std::{ffi::OsString, path::PathBuf, time::Duration, time::Instant}; From 93b9e12a1de090d1c07968144f6d21061e6de50a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 22 Feb 2020 13:30:50 +0800 Subject: [PATCH 3/3] Remove short-comings from README, as they are not present anymore --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index d818beb2..abf01f0d 100644 --- a/README.md +++ b/README.md @@ -172,10 +172,8 @@ Thanks to [jwalk][jwalk], all there was left to do is to write a command-line in ### Limitations * Interactive mode only looks good in dark terminals (see [this issue](https://github.com/Byron/dua-cli/issues/13)) -* _Hard links_ are not understood, thus hard-linked files will possibly be counted multiple times. * _Symlinks_ are followed and we obtain the logical size of the file they point to. Ideally, we only count their actual size. -* _logical filesize_ is used instead of computed or estimating actual size on disk. * _easy fix_: file names in main window are not truncated if too large. They are cut off on the right. * There are plenty of examples in `tests/fixtures` which don't render correctly in interactive mode. This can be due to graphemes not interpreted correctly. With Chinese characters for instance,