diff --git a/mlar/Cargo.toml b/mlar/Cargo.toml index aafd5fc..afe3d85 100644 --- a/mlar/Cargo.toml +++ b/mlar/Cargo.toml @@ -27,6 +27,7 @@ zeroize = { version = "1", default-features = false} # Could be made optional / feature to enable (for binary size) tar = "0.4" rand_chacha = "0.3" +lru = "0" [dev-dependencies] assert_cmd = "2.0" diff --git a/mlar/src/main.rs b/mlar/src/main.rs index 31aea1b..240eee3 100644 --- a/mlar/src/main.rs +++ b/mlar/src/main.rs @@ -5,6 +5,7 @@ use curve25519_parser::{ use glob::Pattern; use hkdf::Hkdf; use humansize::{FormatSize, DECIMAL}; +use lru::LruCache; use mla::config::{ArchiveReaderConfig, ArchiveWriterConfig}; use mla::errors::{Error, FailSafeReadError}; use mla::helpers::linear_extract; @@ -25,7 +26,9 @@ use std::fmt; use std::fs::{self, read_dir, File}; use std::io::{self, BufRead}; use std::io::{Read, Seek, Write}; +use std::num::NonZeroUsize; use std::path::{Component, Path, PathBuf}; +use std::sync::Mutex; use tar::{Builder, Header}; use zeroize::Zeroize; @@ -443,19 +446,32 @@ fn create_file>( /// /// This wrapper is used to avoid opening all files simultaneously, potentially /// reaching the filesystem limit, but rather appending to file on-demand -/// This could be enhanced with a limited pool of active file, but this -/// optimisation doesn't seems necessary for now -struct FileWriter { +/// +/// A limited pool of active file, in a LRU cache, is used to avoid too many open-close +struct FileWriter<'a> { /// Target file for data appending path: PathBuf, + /// Reference on the cache + // A `Mutex` is used instead of a `RefCell` as `FileWriter` can be `Send` + cache: &'a Mutex>, } -impl Write for FileWriter { +/// Max number of fd simultaneously opened +pub const FILE_WRITER_POOL_SIZE: usize = 1000; + +impl<'a> Write for FileWriter<'a> { fn write(&mut self, buf: &[u8]) -> io::Result { - fs::OpenOptions::new() - .append(true) - .open(&self.path)? - .write(buf) + // Only one thread is using the FileWriter, safe to `.unwrap()` + let mut cache = self.cache.lock().unwrap(); + if !cache.contains(&self.path) { + let file = fs::OpenOptions::new().append(true).open(&self.path)?; + cache.put(self.path.clone(), file); + } + // Safe to `unwrap` here cause we ensure the element is in the cache (mono-threaded) + let file = cache.get_mut(&self.path).unwrap(); + file.write(buf) + + // `file` will be closed on deletion from the cache } fn flush(&mut self) -> io::Result<()> { @@ -575,11 +591,20 @@ fn extract(matches: &ArgMatches) -> Result<(), MlarError> { if verbose { println!("Extracting the whole archive using a linear extraction"); } + let cache = Mutex::new(LruCache::new( + NonZeroUsize::new(FILE_WRITER_POOL_SIZE).unwrap(), + )); let mut export: HashMap<&String, FileWriter> = HashMap::new(); for fname in &iter { match create_file(&output_dir, fname)? { Some((_file, path)) => { - export.insert(fname, FileWriter { path }); + export.insert( + fname, + FileWriter { + path, + cache: &cache, + }, + ); } None => continue, } diff --git a/mlar/tests/integration.rs b/mlar/tests/integration.rs index 0058998..9d37ddd 100644 --- a/mlar/tests/integration.rs +++ b/mlar/tests/integration.rs @@ -1248,3 +1248,130 @@ fn test_no_open_on_encrypt() { let assert = cmd.assert(); assert.failure(); } + +// This value should be bigger than FILE_WRITER_POOL_SIZE +const TEST_MANY_FILES_NB: usize = 2000; + +#[test] +fn test_extract_lot_files() { + let mlar_file = NamedTempFile::new("output.mla").unwrap(); + let mut rng: StdRng = SeedableRng::from_seed([0u8; 32]); + let mut files_archive_order = vec![]; + let mut files = vec![]; + const SIZE_FILE: usize = 10; + + // Create many files, filled with a few alphanumeric characters + for i in 1..TEST_MANY_FILES_NB { + let tmp_file = NamedTempFile::new(format!("file{}.bin", i)).unwrap(); + let data: Vec = Alphanumeric.sample_iter(&mut rng).take(SIZE_FILE).collect(); + tmp_file.write_binary(data.as_slice()).unwrap(); + + files_archive_order.push(tmp_file.path().to_path_buf()); + files.push(tmp_file); + } + + files.sort_by(|i1, i2| Ord::cmp(&i1.path(), &i2.path())); + + let mut testfs = TestFS { + files, + files_archive_order, + }; + + // `mlar create -l -o output.mla - + let mut cmd = Command::cargo_bin(UTIL).unwrap(); + cmd.arg("create") + .arg("-l") + .arg("-o") + .arg(mlar_file.path()) + .arg("-"); + + // Use "-" to avoid large command line (Windows limitation is about 8191 char) + let mut file_list = String::new(); + for file in &testfs.files { + file_list.push_str(format!("{}\n", file.path().to_string_lossy()).as_str()); + } + cmd.write_stdin(String::from(&file_list)); + + println!("{:?}", cmd); + let assert = cmd.assert(); + assert.success().stderr(String::from(&file_list)); + + let mut file_list = String::new(); + for file in &testfs.files { + file_list.push_str(format!("{}\n", file.path().to_string_lossy()).as_str()); + } + + // Test global (with all files) + + // `mlar extract -v -i output.mla -o ouput_dir -g '*'` + let output_dir = TempDir::new().unwrap(); + let mut cmd = Command::cargo_bin(UTIL).unwrap(); + cmd.arg("extract") + .arg("-v") + .arg("-i") + .arg(mlar_file.path()) + .arg("-o") + .arg(output_dir.path()) + .arg("-g") + .arg("*"); + + println!("{:?}", cmd); + let assert = cmd.assert(); + assert.success().stdout(file_list); + + ensure_directory_content(output_dir.path(), &testfs.files); + + // Test linear extraction of all files + + // `mlar extract -v -i output.mla -o ouput_dir` + let output_dir = TempDir::new().unwrap(); + let mut cmd = Command::cargo_bin(UTIL).unwrap(); + cmd.arg("extract") + .arg("-v") + .arg("-i") + .arg(mlar_file.path()) + .arg("-o") + .arg(output_dir.path()); + + println!("{:?}", cmd); + let assert = cmd.assert(); + assert + .success() + .stdout("Extracting the whole archive using a linear extraction\n"); + + ensure_directory_content(output_dir.path(), &testfs.files); + + // Test extraction of one file explicitly + // `mlar extract -v -i output.mla -o ouput_dir file1` + let one_filename = &testfs.files_archive_order[0]; + let mut one_file = Vec::new(); + loop { + match testfs.files.pop() { + None => { + break; + } + Some(ntf) => { + if ntf.path() == one_filename { + one_file.push(ntf); + } + } + } + } + let output_dir = TempDir::new().unwrap(); + let mut cmd = Command::cargo_bin(UTIL).unwrap(); + cmd.arg("extract") + .arg("-v") + .arg("-i") + .arg(mlar_file.path()) + .arg("-o") + .arg(output_dir.path()) + .arg(one_filename); + + println!("{:?}", cmd); + let assert = cmd.assert(); + assert + .success() + .stdout(format!("{}\n", one_filename.to_string_lossy())); + + ensure_directory_content(output_dir.path(), &one_file); +}