From fb0fdeb277d853fe2c1f4a7f13f26bf3efd15ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carbon=20=E7=A2=B3=E8=8B=AF?= <2779066456@qq.com> Date: Sun, 7 Dec 2025 23:36:40 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=F0=9F=98=84=20align=20API=20&=20op?= =?UTF-8?q?timize=20readdir=20speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __test__/readdir.spec.ts | 46 ++++++++-- benchmark/readdir.ts | 15 +++- index.d.ts | 9 +- src/lib.rs | 4 +- src/read_dir.rs | 108 ------------------------ src/readdir.rs | 176 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 235 insertions(+), 123 deletions(-) delete mode 100644 src/read_dir.rs create mode 100644 src/readdir.rs diff --git a/__test__/readdir.spec.ts b/__test__/readdir.spec.ts index 1466e85..b4206a2 100644 --- a/__test__/readdir.spec.ts +++ b/__test__/readdir.spec.ts @@ -1,20 +1,44 @@ import test from 'ava' import { readdirSync, readdir } from '../index.js' -test('sync: should list files in current directory', (t) => { +test('sync: should list files in current directory (strings by default)', (t) => { const files = readdirSync('.') t.true(Array.isArray(files)) t.true(files.length > 0) + // Verify it returns strings + t.is(typeof files[0], 'string') + + const packageJson = files.find((f) => f === 'package.json') + t.truthy(packageJson, 'Result should contain package.json') +}) + +test('sync: should return Dirent objects when withFileTypes is true', (t) => { + const files = readdirSync('.', { withFileTypes: true }) + + t.true(Array.isArray(files)) + t.true(files.length > 0) + // Verify Dirent structure - const packageJson = files.find((f) => f.name === 'package.json') + // We need to cast or check type because typescript might infer union type + const first = files[0] + if (typeof first === 'object') { + t.is(typeof first.name, 'string') + t.is(typeof first.isDir, 'boolean') + } else { + t.fail('Should return objects when withFileTypes is true') + } + + const packageJson = files.find((f) => typeof f !== 'string' && f.name === 'package.json') t.truthy(packageJson, 'Result should contain package.json') - t.is(packageJson?.isDir, false) - t.true(packageJson?.path.includes('package.json')) - const srcDir = files.find((f) => f.name === 'src') - if (srcDir) { + if (typeof packageJson !== 'string' && packageJson) { + t.is(packageJson.isDir, false) + } + + const srcDir = files.find((f) => typeof f !== 'string' && f.name === 'src') + if (srcDir && typeof srcDir !== 'string') { t.is(srcDir.isDir, true, 'src should be identified as a directory') } }) @@ -22,12 +46,14 @@ test('sync: should list files in current directory', (t) => { test('async: should list files in current directory', async (t) => { const files = await readdir('.') t.true(files.length > 0) - t.truthy(files.find((f) => f.name === 'package.json')) + t.is(typeof files[0], 'string') + t.truthy(files.find((f) => f === 'package.json')) }) test('concurrency: run with specific thread count', (t) => { const files = readdirSync('.', { concurrency: 4, + recursive: true, // concurrency only works with recursive/walk_dir }) t.true(files.length > 0) }) @@ -35,6 +61,7 @@ test('concurrency: run with specific thread count', (t) => { test('concurrency: run with high thread count (stress test)', (t) => { const files = readdirSync('.', { concurrency: 100, + recursive: true, }) t.true(files.length > 0) }) @@ -44,11 +71,12 @@ test('options: skip_hidden should filter out dotfiles', (t) => { // but based on your rust code, default is false) const allFiles = readdirSync('.', { skipHidden: false }) // Assuming this repo has a .git folder or similar - const hasHidden = allFiles.some((f) => f.name.startsWith('.')) + // files are strings now + const hasHidden = allFiles.some((f) => (typeof f === 'string' ? f : f.name).startsWith('.')) if (hasHidden) { const visibleFiles = readdirSync('.', { skipHidden: true }) - const hiddenRemains = visibleFiles.some((f) => f.name.startsWith('.')) + const hiddenRemains = visibleFiles.some((f) => (typeof f === 'string' ? f : f.name).startsWith('.')) t.false(hiddenRemains, 'Should not contain hidden files when skip_hidden is true') } else { t.pass('No hidden files found in root to test skipping') diff --git a/benchmark/readdir.ts b/benchmark/readdir.ts index 9d09421..f2e568c 100644 --- a/benchmark/readdir.ts +++ b/benchmark/readdir.ts @@ -11,18 +11,29 @@ console.log(`Benchmarking readdir on: ${dir}`) bench .add('Node.js fs.readdirSync', () => { + fs.readdirSync(dir) + }) + .add('Node.js fs.readdirSync (withFileTypes)', () => { fs.readdirSync(dir, { withFileTypes: true }) }) - .add('Node.js fs.readdirSync (recursive)', () => { + .add('Node.js fs.readdirSync (recursive, withFileTypes)', () => { fs.readdirSync(dir, { recursive: true, withFileTypes: true }) }) .add('hyper-fs readdirSync (default)', () => { readdirSync(dir) }) + .add('hyper-fs readdirSync (2 threads)', () => { + readdirSync(dir, { concurrency: 2 }) + }) + .add('hyper-fs readdirSync (2 threads, recursive)', () => { + readdirSync(dir, { concurrency: 2, recursive: true }) + }) .add('hyper-fs readdirSync (4 threads)', () => { readdirSync(dir, { concurrency: 4 }) }) - + .add('hyper-fs readdirSync (4 threads, recursive)', () => { + readdirSync(dir, { concurrency: 4, recursive: true }) + }) await bench.run() console.table(bench.table()) diff --git a/index.d.ts b/index.d.ts index 82f1276..b56ade1 100644 --- a/index.d.ts +++ b/index.d.ts @@ -2,7 +2,7 @@ /* eslint-disable */ export interface Dirent { name: string - path: string + parentPath: string isDir: boolean } @@ -11,6 +11,11 @@ export declare function readdir(path: string, options?: ReaddirOptions | undefin export interface ReaddirOptions { skipHidden?: boolean concurrency?: number + recursive?: boolean + withFileTypes?: boolean } -export declare function readdirSync(path: string, options?: ReaddirOptions | undefined | null): Array +export declare function readdirSync( + path: string, + options?: ReaddirOptions | undefined | null, +): Array | Array diff --git a/src/lib.rs b/src/lib.rs index 3fee200..d055b2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![deny(clippy::all)] // define modules -pub mod read_dir; +pub mod readdir; //export modules -pub use read_dir::*; +pub use readdir::*; diff --git a/src/read_dir.rs b/src/read_dir.rs deleted file mode 100644 index 3413a12..0000000 --- a/src/read_dir.rs +++ /dev/null @@ -1,108 +0,0 @@ -use jwalk::{Parallelism, WalkDir}; -use napi::bindgen_prelude::*; -use napi::Task; -use napi_derive::napi; -use std::path::Path; - -// basic usage -// ls('./node_modules') - -// advanced usage -// readdirSync('./src', { -// recursive: true, -// concurrency: 8, -// ignore: ['.git'], -// returnType: 'Tree' -// }); - -#[napi(object)] -#[derive(Clone)] -pub struct ReaddirOptions { - pub skip_hidden: Option, - pub concurrency: Option, -} - -#[napi(object)] // Similar to fs.Dirent -#[derive(Clone)] -pub struct Dirent { - pub name: String, - pub path: String, - pub is_dir: bool, -} - -// #[napi] // marco: expose the function to Node -fn ls(path: String, options: Option) -> Result> { - if !Path::new(&path).exists() { - return Err(Error::from_reason(format!( - "ENOENT: no such file or directory, scandir '{}'", - path - ))); - } - - let opts = options.unwrap_or(ReaddirOptions { - skip_hidden: Some(false), - concurrency: None, - }); - let skip_hidden = opts.skip_hidden.unwrap_or(false); - let root_path = if path.is_empty() { - Path::new(".") - } else { - Path::new(&path) - }; - let walk_dir = WalkDir::new(root_path) - .skip_hidden(skip_hidden) - .parallelism(match opts.concurrency { - Some(n) => Parallelism::RayonNewPool(n as usize), - None => Parallelism::RayonNewPool(0), - }); - - // TODO: maybe we'd better limit the max number of threads? - - let result = walk_dir - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| { - if skip_hidden { - !e.file_name().to_string_lossy().starts_with('.') - } else { - true - } - }) - .map(|e| Dirent { - name: e.file_name().to_string_lossy().to_string(), - path: e.path().to_string_lossy().to_string(), - is_dir: e.file_type().is_dir(), - }) - .collect(); - - Ok(result) -} - -#[napi(js_name = "readdirSync")] -pub fn readdir_sync(path: String, options: Option) -> Result> { - ls(path, options) -} - -// ========= async version ========= - -pub struct ReaddirTask { - pub path: String, - pub options: Option, -} - -impl Task for ReaddirTask { - type Output = Vec; - type JsValue = Vec; - - fn compute(&mut self) -> Result { - ls(self.path.clone(), self.options.clone()) - } - fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { - Ok(output) - } -} - -#[napi(js_name = "readdir")] -pub fn readdir(path: String, options: Option) -> AsyncTask { - AsyncTask::new(ReaddirTask { path, options }) -} diff --git a/src/readdir.rs b/src/readdir.rs new file mode 100644 index 0000000..47d2745 --- /dev/null +++ b/src/readdir.rs @@ -0,0 +1,176 @@ +use jwalk::{Parallelism, WalkDir}; +use napi::bindgen_prelude::*; +use napi::Task; +use napi_derive::napi; +use std::fs; +use std::path::Path; + +// basic usage +// ls('./node_modules') + +// advanced usage +// readdirSync('./src', { +// recursive: true, +// concurrency: 8, +// ignore: ['.git'], +// }); + +#[napi(object)] +#[derive(Clone)] +pub struct ReaddirOptions { + pub skip_hidden: Option, + pub concurrency: Option, + pub recursive: Option, + pub with_file_types: Option, +} + +#[napi(object)] // Similar to fs.Dirent +#[derive(Clone)] +pub struct Dirent { + pub name: String, + pub parent_path: String, + pub is_dir: bool, +} + +// #[napi] // marco: expose the function to Node +fn ls( + path_str: String, + options: Option, +) -> Result, Vec>> { + let search_path_str = if path_str.is_empty() { "." } else { &path_str }; + let path = Path::new(search_path_str); + if !Path::new(&path).exists() { + return Err(Error::from_reason(format!( + "ENOENT: no such file or directory, scandir '{}'", + path.to_string_lossy() + ))); + } + let opts = options.unwrap_or(ReaddirOptions { + skip_hidden: Some(false), + concurrency: None, + recursive: Some(false), + with_file_types: Some(false), + }); + + let skip_hidden = opts.skip_hidden.unwrap_or(false); + let recursive = opts.recursive.unwrap_or(false); + let with_file_types = opts.with_file_types.unwrap_or(false); + + if !recursive { + let parent_path_val = search_path_str.to_string(); + let entries = fs::read_dir(path).map_err(|e| Error::from_reason(e.to_string()))?; + + if with_file_types { + let mut result = Vec::with_capacity(64); + for entry in entries { + let entry = entry.map_err(|e| Error::from_reason(e.to_string()))?; + let file_name = entry.file_name(); + let name_str = file_name.to_string_lossy(); + if skip_hidden && name_str.starts_with('.') { + continue; + } + result.push(Dirent { + name: name_str.to_string(), + parent_path: parent_path_val.clone(), + is_dir: entry.file_type().map(|t| t.is_dir()).unwrap_or(false), + }); + } + return Ok(Either::B(result)); + } else { + let mut result = Vec::with_capacity(64); + for entry in entries { + let entry = entry.map_err(|e| Error::from_reason(e.to_string()))?; + let file_name = entry.file_name(); + let name_str = file_name.to_string_lossy(); + if skip_hidden && name_str.starts_with('.') { + continue; + } + result.push(name_str.to_string()); + } + return Ok(Either::A(result)); + } + } + + let walk_dir = WalkDir::new(path) + .skip_hidden(skip_hidden) + .parallelism(match opts.concurrency { + Some(n) => Parallelism::RayonNewPool(n as usize), + None => Parallelism::RayonNewPool(0), + }); + + // TODO: maybe we'd better limit the max number of threads? + + if with_file_types { + let result = walk_dir + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.depth() > 0) + .map(|e| { + let p = e.path(); + let parent = p + .parent() + .unwrap_or(Path::new("")) + .to_string_lossy() + .to_string(); + + Dirent { + name: e.file_name().to_string_lossy().to_string(), + parent_path: parent, + is_dir: e.file_type().is_dir(), + } + }) + .collect(); + Ok(Either::B(result)) + } else { + // When recursive is true and withFileTypes is false, Node.js returns relative paths. + // jwalk entries have full paths. + // We need to strip the root path. + let root = path; + let result = walk_dir + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.depth() > 0) + .map(|e| { + // Get path relative to root + let p = e.path(); + match p.strip_prefix(root) { + Ok(relative) => relative.to_string_lossy().to_string(), + Err(_) => e.file_name().to_string_lossy().to_string(), // Fallback + } + }) + .collect(); + Ok(Either::A(result)) + } +} + +#[napi(js_name = "readdirSync")] +pub fn readdir_sync( + path: String, + options: Option, +) -> Result, Vec>> { + ls(path, options) +} + +// ========= async version ========= + +pub struct ReaddirTask { + pub path: String, + pub options: Option, +} + +impl Task for ReaddirTask { + type Output = Either, Vec>; + type JsValue = Either, Vec>; + + fn compute(&mut self) -> Result { + ls(self.path.clone(), self.options.clone()) + } + fn resolve(&mut self, _env: Env, output: Self::Output) -> Result { + Ok(output) + } +} + +#[napi(js_name = "readdir")] +pub fn readdir(path: String, options: Option) -> AsyncTask { + AsyncTask::new(ReaddirTask { path, options }) +} From 78010bc842de3c7f3c6a12f3d0d2c56b29b0e99a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carbon=20=E7=A2=B3=E8=8B=AF?= <2779066456@qq.com> Date: Mon, 8 Dec 2025 00:15:10 +0800 Subject: [PATCH 2/2] feat: modify test and bench file --- benchmark/readdir.ts | 15 ++++++++----- src/readdir.rs | 53 ++++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/benchmark/readdir.ts b/benchmark/readdir.ts index f2e568c..26797dc 100644 --- a/benchmark/readdir.ts +++ b/benchmark/readdir.ts @@ -22,18 +22,21 @@ bench .add('hyper-fs readdirSync (default)', () => { readdirSync(dir) }) - .add('hyper-fs readdirSync (2 threads)', () => { - readdirSync(dir, { concurrency: 2 }) + .add('hyper-fs readdirSync (withFileTypes)', () => { + readdirSync(dir, { withFileTypes: true }) }) - .add('hyper-fs readdirSync (2 threads, recursive)', () => { - readdirSync(dir, { concurrency: 2, recursive: true }) + .add('hyper-fs readdirSync (recursive)', () => { + readdirSync(dir, { recursive: true }) }) - .add('hyper-fs readdirSync (4 threads)', () => { - readdirSync(dir, { concurrency: 4 }) + .add('hyper-fs readdirSync (recursive, withFileTypes)', () => { + readdirSync(dir, { recursive: true, withFileTypes: true }) }) .add('hyper-fs readdirSync (4 threads, recursive)', () => { readdirSync(dir, { concurrency: 4, recursive: true }) }) + .add('hyper-fs readdirSync (4 threads, recursive, withFileTypes)', () => { + readdirSync(dir, { concurrency: 4, recursive: true, withFileTypes: true }) + }) await bench.run() console.table(bench.table()) diff --git a/src/readdir.rs b/src/readdir.rs index 47d2745..2c9722a 100644 --- a/src/readdir.rs +++ b/src/readdir.rs @@ -60,34 +60,40 @@ fn ls( let parent_path_val = search_path_str.to_string(); let entries = fs::read_dir(path).map_err(|e| Error::from_reason(e.to_string()))?; - if with_file_types { - let mut result = Vec::with_capacity(64); - for entry in entries { - let entry = entry.map_err(|e| Error::from_reason(e.to_string()))?; - let file_name = entry.file_name(); - let name_str = file_name.to_string_lossy(); - if skip_hidden && name_str.starts_with('.') { - continue; - } - result.push(Dirent { + let mut result_files = if with_file_types { + None + } else { + Some(Vec::with_capacity(64)) + }; + let mut result_dirents = if with_file_types { + Some(Vec::with_capacity(64)) + } else { + None + }; + + for entry in entries { + let entry = entry.map_err(|e| Error::from_reason(e.to_string()))?; + let file_name = entry.file_name(); + let name_str = file_name.to_string_lossy(); + if skip_hidden && name_str.starts_with('.') { + continue; + } + + if let Some(ref mut list) = result_dirents { + list.push(Dirent { name: name_str.to_string(), parent_path: parent_path_val.clone(), is_dir: entry.file_type().map(|t| t.is_dir()).unwrap_or(false), }); + } else if let Some(ref mut list) = result_files { + list.push(name_str.to_string()); } - return Ok(Either::B(result)); + } + + if with_file_types { + return Ok(Either::B(result_dirents.unwrap())); } else { - let mut result = Vec::with_capacity(64); - for entry in entries { - let entry = entry.map_err(|e| Error::from_reason(e.to_string()))?; - let file_name = entry.file_name(); - let name_str = file_name.to_string_lossy(); - if skip_hidden && name_str.starts_with('.') { - continue; - } - result.push(name_str.to_string()); - } - return Ok(Either::A(result)); + return Ok(Either::A(result_files.unwrap())); } } @@ -123,8 +129,7 @@ fn ls( Ok(Either::B(result)) } else { // When recursive is true and withFileTypes is false, Node.js returns relative paths. - // jwalk entries have full paths. - // We need to strip the root path. + // But jwalk entries have full paths, We need to strip the root path. let root = path; let result = walk_dir .into_iter()