From 28ea1ab758d068faf939111ebdd649e16c42d589 Mon Sep 17 00:00:00 2001 From: dhoegh Date: Wed, 21 Oct 2015 19:37:55 +0200 Subject: [PATCH] Implement walkdir. fixes #8814 --- base/exports.jl | 1 + base/file.jl | 60 ++++++++++++++++++++++++++ doc/stdlib/file.rst | 19 +++++++++ test/file.jl | 101 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+) diff --git a/base/exports.jl b/base/exports.jl index af5c8d856e038..b4ed2a7f601f8 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1298,6 +1298,7 @@ export tempname, touch, uperm, + walkdir, # external processes ## TODO: whittle down these exports. detach, diff --git a/base/file.jl b/base/file.jl index 94f753d8b93d8..1f7f2aba1a8f7 100644 --- a/base/file.jl +++ b/base/file.jl @@ -260,3 +260,63 @@ function readdir(path::AbstractString) end readdir() = readdir(".") + +""" + walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw) + +The walkdir method return an iterator that walks the directory tree of a directory. The iterator returns a tuple containing +`(rootpath, dirs, files)`. The directory tree can be traversed top-down or bottom-up. If walkdir encounters a SystemError +it will raise the error. A custom error handling function can be provided through `onerror` keyword argument, the function +is called with a SystemError as argument. + + for (root, dirs, files) in walkdir(".") + println("Directories in \$root") + for dir in dirs + println(joinpath(root, dir)) # path to directories + end + println("Files in \$root") + for file in files + println(joinpath(root, file)) # path to files + end + end + +""" +function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw) + content = nothing + try + content = readdir(root) + catch err + isa(err, SystemError) || throw(err) + onerror(err) + #Need to return an empty task to skip the current root folder + return Task(()->()) + end + dirs = Array(eltype(content), 0) + files = Array(eltype(content), 0) + for name in content + if isdir(joinpath(root, name)) + push!(dirs, name) + else + push!(files, name) + end + end + + function _it() + if topdown + produce(root, dirs, files) + end + for dir in dirs + path = joinpath(root,dir) + if follow_symlinks || !islink(path) + for (root_l, dirs_l, files_l) in walkdir(path, topdown=topdown, follow_symlinks=follow_symlinks, onerror=onerror) + produce(root_l, dirs_l, files_l) + end + end + end + if !topdown + produce(root, dirs, files) + end + end + Task(_it) +end + diff --git a/doc/stdlib/file.rst b/doc/stdlib/file.rst index c3be7218ca012..bc3759d950369 100644 --- a/doc/stdlib/file.rst +++ b/doc/stdlib/file.rst @@ -28,6 +28,25 @@ Returns the files and directories in the directory ``dir`` (or the current working directory if not given). +.. function:: walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw) + + .. Docstring generated from Julia source + + The walkdir method return an iterator that walks the directory tree of a directory. The iterator returns a tuple containing ``(rootpath, dirs, files)``\ . The directory tree can be traversed top-down or bottom-up. If walkdir encounters a SystemError it will raise the error. A custom error handling function can be provided through ``onerror`` keyword argument, the function is called with a SystemError as argument. + + .. code-block:: julia + + for (root, dirs, files) in walkdir(".") + println("Directories in $root") + for dir in dirs + println(joinpath(root, dir)) # path to directories + end + println("Files in $root") + for file in files + println(joinpath(root, file)) # path to files + end + end + .. function:: mkdir(path, [mode]) .. Docstring generated from Julia source diff --git a/test/file.jl b/test/file.jl index bd08ef3215f49..a9c17d0433239 100644 --- a/test/file.jl +++ b/test/file.jl @@ -855,6 +855,107 @@ end @test_throws ArgumentError download("good", "ba\0d") @test_throws ArgumentError download("ba\0d", "good") +################### +# walkdir # +################### + +dirwalk = mktempdir() +cd(dirwalk) do + for i=1:2 + mkdir("sub_dir$i") + open("file$i", "w") do f end + + mkdir(joinpath("sub_dir1", "subsub_dir$i")) + touch(joinpath("sub_dir1", "file$i")) + end + touch(joinpath("sub_dir2", "file_dir2")) + has_symlinks = @unix? true : (Base.windows_version() >= Base.WINDOWS_VISTA_VER) + follow_symlink_vec = has_symlinks ? [true, false] : [false] + has_symlinks && symlink(abspath("sub_dir2"), joinpath("sub_dir1", "link")) + for follow_symlinks in follow_symlink_vec + task = walkdir(".", follow_symlinks=follow_symlinks) + root, dirs, files = consume(task) + @test root == "." + @test dirs == ["sub_dir1", "sub_dir2"] + @test files == ["file1", "file2"] + + root, dirs, files = consume(task) + @test root == joinpath(".", "sub_dir1") + @test dirs == (has_symlinks ? ["link", "subsub_dir1", "subsub_dir2"] : ["subsub_dir1", "subsub_dir2"]) + @test files == ["file1", "file2"] + + root, dirs, files = consume(task) + if follow_symlinks + @test root == joinpath(".", "sub_dir1", "link") + @test dirs == [] + @test files == ["file_dir2"] + root, dirs, files = consume(task) + end + for i=1:2 + @test root == joinpath(".", "sub_dir1", "subsub_dir$i") + @test dirs == [] + @test files == [] + root, dirs, files = consume(task) + end + + @test root == joinpath(".", "sub_dir2") + @test dirs == [] + @test files == ["file_dir2"] + end + + for follow_symlinks in follow_symlink_vec + task = walkdir(".", follow_symlinks=follow_symlinks, topdown=false) + root, dirs, files = consume(task) + if follow_symlinks + @test root == joinpath(".", "sub_dir1", "link") + @test dirs == [] + @test files == ["file_dir2"] + root, dirs, files = consume(task) + end + for i=1:2 + @test root == joinpath(".", "sub_dir1", "subsub_dir$i") + @test dirs == [] + @test files == [] + root, dirs, files = consume(task) + end + @test root == joinpath(".", "sub_dir1") + @test dirs == (has_symlinks ? ["link", "subsub_dir1", "subsub_dir2"] : ["subsub_dir1", "subsub_dir2"]) + @test files == ["file1", "file2"] + + root, dirs, files = consume(task) + @test root == joinpath(".", "sub_dir2") + @test dirs == [] + @test files == ["file_dir2"] + + root, dirs, files = consume(task) + @test root == "." + @test dirs == ["sub_dir1", "sub_dir2"] + @test files == ["file1", "file2"] + end + #test of error handling + task_error = walkdir(".") + task_noerror = walkdir(".", onerror=x->x) + root, dirs, files = consume(task_error) + @test root == "." + @test dirs == ["sub_dir1", "sub_dir2"] + @test files == ["file1", "file2"] + + rm(joinpath("sub_dir1"), recursive=true) + @test_throws SystemError consume(task_error) # throws an error because sub_dir1 do not exist + + root, dirs, files = consume(task_noerror) + @test root == "." + @test dirs == ["sub_dir1", "sub_dir2"] + @test files == ["file1", "file2"] + + root, dirs, files = consume(task_noerror) # skips sub_dir1 as it no longer exist + @test root == joinpath(".", "sub_dir2") + @test dirs == [] + @test files == ["file_dir2"] + +end +rm(dirwalk, recursive=true) + ############ # Clean up # ############