From 28ea1ab758d068faf939111ebdd649e16c42d589 Mon Sep 17 00:00:00 2001
From: dhoegh <dhoegh91@gmail.com>
Date: Wed, 21 Oct 2015 19:37:55 +0200
Subject: [PATCH] Implement walkdir. fixes #8814

---
 base/exports.jl     |   1 +
 base/file.jl        |  60 ++++++++++++++++++++++++++
 doc/stdlib/file.rst |  19 +++++++++
 test/file.jl        | 101 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 181 insertions(+)

diff --git a/base/exports.jl b/base/exports.jl
index af5c8d856e038..b4ed2a7f601f8 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -1298,6 +1298,7 @@ export
     tempname,
     touch,
     uperm,
+    walkdir,
 
 # external processes ## TODO: whittle down these exports.
     detach,
diff --git a/base/file.jl b/base/file.jl
index 94f753d8b93d8..1f7f2aba1a8f7 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -260,3 +260,63 @@ function readdir(path::AbstractString)
 end
 
 readdir() = readdir(".")
+
+"""
+    walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
+
+The walkdir method return an iterator that walks the directory tree of a directory. The iterator returns a tuple containing
+`(rootpath, dirs, files)`. The directory tree can be traversed top-down or bottom-up. If walkdir encounters a SystemError
+it will raise the error. A custom error handling function can be provided through `onerror` keyword argument, the function
+is called with a SystemError as argument.
+
+    for (root, dirs, files) in walkdir(".")
+        println("Directories in \$root")
+        for dir in dirs
+            println(joinpath(root, dir)) # path to directories
+        end
+        println("Files in \$root")
+        for file in files
+            println(joinpath(root, file)) # path to files
+        end
+    end
+
+"""
+function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
+    content = nothing
+    try
+        content = readdir(root)
+    catch err
+        isa(err, SystemError) || throw(err)
+        onerror(err)
+        #Need to return an empty task to skip the current root folder
+        return Task(()->())
+    end
+    dirs = Array(eltype(content), 0)
+    files = Array(eltype(content), 0)
+    for name in content
+        if isdir(joinpath(root, name))
+            push!(dirs, name)
+        else
+            push!(files, name)
+        end
+    end
+
+    function _it()
+        if topdown
+            produce(root, dirs, files)
+        end
+        for dir in dirs
+            path = joinpath(root,dir)
+            if follow_symlinks || !islink(path)
+                for (root_l, dirs_l, files_l) in walkdir(path, topdown=topdown, follow_symlinks=follow_symlinks, onerror=onerror)
+                    produce(root_l, dirs_l, files_l)
+                end
+            end
+        end
+        if !topdown
+            produce(root, dirs, files)
+        end
+    end
+    Task(_it)
+end
+
diff --git a/doc/stdlib/file.rst b/doc/stdlib/file.rst
index c3be7218ca012..bc3759d950369 100644
--- a/doc/stdlib/file.rst
+++ b/doc/stdlib/file.rst
@@ -28,6 +28,25 @@
 
    Returns the files and directories in the directory ``dir`` (or the current working directory if not given).
 
+.. function:: walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
+
+   .. Docstring generated from Julia source
+
+   The walkdir method return an iterator that walks the directory tree of a directory. The iterator returns a tuple containing ``(rootpath, dirs, files)``\ . The directory tree can be traversed top-down or bottom-up. If walkdir encounters a SystemError it will raise the error. A custom error handling function can be provided through ``onerror`` keyword argument, the function is called with a SystemError as argument.
+
+   .. code-block:: julia
+
+       for (root, dirs, files) in walkdir(".")
+           println("Directories in $root")
+           for dir in dirs
+               println(joinpath(root, dir)) # path to directories
+           end
+           println("Files in $root")
+           for file in files
+               println(joinpath(root, file)) # path to files
+           end
+       end
+
 .. function:: mkdir(path, [mode])
 
    .. Docstring generated from Julia source
diff --git a/test/file.jl b/test/file.jl
index bd08ef3215f49..a9c17d0433239 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -855,6 +855,107 @@ end
 @test_throws ArgumentError download("good", "ba\0d")
 @test_throws ArgumentError download("ba\0d", "good")
 
+###################
+#     walkdir     #
+###################
+
+dirwalk = mktempdir()
+cd(dirwalk) do
+    for i=1:2
+        mkdir("sub_dir$i")
+        open("file$i", "w") do f end
+
+        mkdir(joinpath("sub_dir1", "subsub_dir$i"))
+        touch(joinpath("sub_dir1", "file$i"))
+    end
+    touch(joinpath("sub_dir2", "file_dir2"))
+    has_symlinks = @unix? true : (Base.windows_version() >= Base.WINDOWS_VISTA_VER)
+    follow_symlink_vec = has_symlinks ? [true, false] : [false]
+    has_symlinks && symlink(abspath("sub_dir2"), joinpath("sub_dir1", "link"))
+    for follow_symlinks in follow_symlink_vec
+        task = walkdir(".", follow_symlinks=follow_symlinks)
+        root, dirs, files = consume(task)
+        @test root == "."
+        @test dirs == ["sub_dir1", "sub_dir2"]
+        @test files == ["file1", "file2"]
+
+        root, dirs, files = consume(task)
+        @test root == joinpath(".", "sub_dir1")
+        @test dirs == (has_symlinks ? ["link", "subsub_dir1", "subsub_dir2"] : ["subsub_dir1", "subsub_dir2"])
+        @test files == ["file1", "file2"]
+
+        root, dirs, files = consume(task)
+        if follow_symlinks
+            @test root == joinpath(".", "sub_dir1", "link")
+            @test dirs == []
+            @test files == ["file_dir2"]
+            root, dirs, files = consume(task)
+        end
+        for i=1:2
+            @test root == joinpath(".", "sub_dir1", "subsub_dir$i")
+            @test dirs == []
+            @test files == []
+            root, dirs, files = consume(task)
+        end
+
+        @test root == joinpath(".", "sub_dir2")
+        @test dirs == []
+        @test files == ["file_dir2"]
+    end
+
+    for follow_symlinks in follow_symlink_vec
+        task = walkdir(".", follow_symlinks=follow_symlinks, topdown=false)
+        root, dirs, files = consume(task)
+        if follow_symlinks
+            @test root == joinpath(".", "sub_dir1", "link")
+            @test dirs == []
+            @test files == ["file_dir2"]
+            root, dirs, files = consume(task)
+        end
+        for i=1:2
+            @test root == joinpath(".", "sub_dir1", "subsub_dir$i")
+            @test dirs == []
+            @test files == []
+            root, dirs, files = consume(task)
+        end
+        @test root == joinpath(".", "sub_dir1")
+        @test dirs ==  (has_symlinks ? ["link", "subsub_dir1", "subsub_dir2"] : ["subsub_dir1", "subsub_dir2"])
+        @test files == ["file1", "file2"]
+
+        root, dirs, files = consume(task)
+        @test root == joinpath(".", "sub_dir2")
+        @test dirs == []
+        @test files == ["file_dir2"]
+
+        root, dirs, files = consume(task)
+        @test root == "."
+        @test dirs == ["sub_dir1", "sub_dir2"]
+        @test files == ["file1", "file2"]
+    end
+    #test of error handling
+    task_error = walkdir(".")
+    task_noerror = walkdir(".", onerror=x->x)
+    root, dirs, files = consume(task_error)
+    @test root == "."
+    @test dirs == ["sub_dir1", "sub_dir2"]
+    @test files == ["file1", "file2"]
+
+    rm(joinpath("sub_dir1"), recursive=true)
+    @test_throws SystemError consume(task_error) # throws an error because sub_dir1 do not exist
+
+    root, dirs, files = consume(task_noerror)
+    @test root == "."
+    @test dirs == ["sub_dir1", "sub_dir2"]
+    @test files == ["file1", "file2"]
+
+    root, dirs, files = consume(task_noerror) # skips sub_dir1 as it no longer exist
+    @test root == joinpath(".", "sub_dir2")
+    @test dirs == []
+    @test files == ["file_dir2"]
+
+end
+rm(dirwalk, recursive=true)
+
 ############
 # Clean up #
 ############