refactor load_function kwarg for collect_results

The purpose of this commit is to incorpoate comments from pull request - removes the anonymous function wrapper, except from `to_data_row` which requires the default be an anonymous function so that the "r" parameter can be fixed in the call. - reverts all the autoformatting As before, a test was added to `update_results_tests.jl`. All tests passed, 589 of 589.
NuclearPowerNerd · Aug 21, 2024 · 6e6ff07 · 6e6ff07
1 parent 71dd90b
commit 6e6ff07
Show file tree

Hide file tree

Showing 2 changed files with 91 additions and 84 deletions.
diff --git a/src/result_collection.jl b/src/result_collection.jl
@@ -50,7 +50,7 @@ See also [`collect_results`](@ref).
 * `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
 * `special_list = []`: List of additional (derived) key-value pairs
   to put in `df` as explained below.
-* `load_function = (filename) -> wload(filename)`: function for loading data from file. This is useful in the event you have data saved as a struct. When loaded from file it will be as a one-element `Dict` which is not what you want passed to the dataframe. Instead, you'd rather have the fields of the struct to be available as columns of the dataframe. In that case you can use this function to ensure the struct is converted to a dict before being processed by `collect_results!`. For example, `load_function = (filename) -> struct2dict(wload(filename)["my_key"])`.
+*  `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.
 
 `special_list` is a `Vector` where each entry
 is a derived quantity to be included in `df`. There are two types of entries.
@@ -84,15 +84,15 @@ Base.showerror(io::IO, e::InvalidResultsCollection) = print(io, e.msg)
 
 
 function collect_results!(filename, folder;
-    valid_filetypes=[".bson", "jld", ".jld2"],
-    subfolders=false,
-    rpath=nothing,
-    verbose=true,
-    update=false,
-    newfile=false, # keyword only for defining collect_results without !
-    rinclude=[r""],
-    rexclude=[r"^\b$"],
-    load_function=(filename) -> wload(filename),
+    valid_filetypes = [".bson", "jld", ".jld2"],
+    subfolders = false,
+    rpath = nothing,
+    verbose = true,
+    update = false,
+    newfile = false, # keyword only for defining collect_results without !
+    rinclude = [r""],
+    rexclude = [r"^\b$"],
+    load_function = wload,
     kwargs...)
 
     @assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
@@ -234,24 +234,15 @@ is_valid_file(file, valid_filetypes) =
     any(endswith(file, v) for v in valid_filetypes)
 
 # Use wload per default when nothing else is available
-function to_data_row(
-    file::File;
-    load_function=(filename) -> wload(filename),
-    kwargs...
-)
+function to_data_row(file::File; load_function=wload, kwargs...)
     fpath = filename(file)
-    @debug "Opening $fpath with fallback wload."
+    @debug "Opening $(filename(file)) with fallback wload."
     return to_data_row(load_function(fpath), fpath; kwargs...)
 end
 # Specialize for JLD2 files, can do much faster mmapped access
-function to_data_row(
-    file::File{format"JLD2"};
-    load_function=(filename) -> JLD2.jldopen(filename, "r"),
-    kwargs...
-)
+function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
     fpath = filename(file)
-    @debug "Opening $fpath with jldopen."
-
+    @debug "Opening $(filename(file)) with jldopen."
     data = load_function(fpath)
     return to_data_row(data, fpath; kwargs...)
 end

diff --git a/test/update_results_tests.jl b/test/update_results_tests.jl
@@ -4,71 +4,87 @@ using BSON, DataFrames, FileIO, JLD2
 
 @testset "Collect Results ($ending)" for ending ∈ ["bson", "jld2"]
 
-    ###############################################################################
-    #                        Setup Folder structure                               #
-    ###############################################################################
-    # %%
-    cd(@__DIR__)
-    isdir("testdir") && rm("testdir", recursive=true)
-    mkdir("testdir")
-    initialize_project("testdir"; git=false)
-    quickactivate("testdir")
-
-    ###############################################################################
-    #                           Create Dummy Data                                 #
-    ###############################################################################
-    mkdir(datadir("results"))
-    cd(datadir("results"))
-
-    d = Dict("a" => 1, "b" => "2", "c" => rand(10))
-    DrWatson.wsave(savename(d) * "." * ending, d)
-
-    d = Dict("a" => 3, "b" => "4", "c" => rand(10), "d" => Float64)
-    DrWatson.wsave(savename(d) * "." * ending, d)
-
-    d = Dict("a" => 3, "c" => rand(10), "d" => Float64)
-    DrWatson.wsave(savename(d) * "." * ending, d)
-
-    mkdir("subfolder")
-    cd("subfolder")
+###############################################################################
+#                        Setup Folder structure                               #
+###############################################################################
+# %%
+cd(@__DIR__)
+isdir("testdir") && rm("testdir", recursive=true)
+mkdir("testdir")
+initialize_project("testdir"; git = false)
+quickactivate("testdir")
+
+###############################################################################
+#                           Create Dummy Data                                 #
+###############################################################################
+mkdir(datadir("results"))
+cd(datadir("results"))
+
+d = Dict("a" => 1, "b" => "2", "c" => rand(10))
+DrWatson.wsave(savename(d)*"."*ending, d)
+
+d = Dict("a" => 3, "b" => "4", "c" => rand(10), "d" => Float64)
+DrWatson.wsave(savename(d)*"."*ending, d)
+
+d = Dict("a" => 3, "c" => rand(10), "d" => Float64)
+DrWatson.wsave(savename(d)*"."*ending, d)
+
+mkdir("subfolder")
+cd("subfolder")
+
+d = Dict("a" => 4., "b" => "twenty" , "d" => Int)
+DrWatson.wsave(savename(d)*"."*ending, d)
+
+###############################################################################
+#                           Collect Data Into DataFrame                       #
+###############################################################################
+using Statistics
+special_list = [ :lv_mean => data -> mean(data["c"]),
+                :lv_var  => data -> var(data["c"])]
+
+black_list = ["c"]
+
+folder = datadir("results")
+
+defaultname = joinpath(dirname(folder), "results_$(basename(folder))."*ending)
+isfile(defaultname) && rm(defaultname)
+cres = collect_results!(defaultname, folder;
+    subfolders = true, special_list=special_list, black_list = black_list)
+
+@test size(cres) == (4, 6)
+for n in ("a", "b", "lv_mean")
+    @test n ∈ String.(names(cres))
+end
+@test "c" ∉ names(cres)
+@test all(startswith.(cres[!,"path"], projectdir()))
+
+relpathname = joinpath(dirname(folder), "results_relpath_$(basename(folder))."*ending)
+cres_relpath = collect_results!(relpathname, folder;
+    subfolders = true, special_list=special_list, black_list = black_list,
+    rpath = projectdir())
+@info all(startswith.(cres[!,"path"], "data"))
+
+struct dummy
+    a::Float64
+    b::Int64
+    c::Matrix{Float64}
+end
+_dummy_matrix = rand(3,3)
+_dummy = dummy(1.0, 1, _dummy_matrix)
+wsave(datadir("dummy.jld2"), "dummy", _dummy)
 
-    d = Dict("a" => 4.0, "b" => "twenty", "d" => Int)
-    DrWatson.wsave(savename(d) * "." * ending, d)
+actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
+_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
+_dataframe_vector[1] = _dummy_matrix
+expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))
 
-    ###############################################################################
-    #                           Collect Data Into DataFrame                       #
-    ###############################################################################
-    using Statistics
-    special_list = [:lv_mean => data -> mean(data["c"]),
-        :lv_var => data -> var(data["c"])]
+@test actual_dataframe == expected_dataframe
 
-    black_list = ["c"]
+###############################################################################
+#                           Trailing slash in foldername                      #
+###############################################################################
 
-    folder = datadir("results")
-
-    defaultname = joinpath(dirname(folder), "results_$(basename(folder))." * ending)
-    isfile(defaultname) && rm(defaultname)
-    cres = collect_results!(defaultname, folder;
-        subfolders=true, special_list=special_list, black_list=black_list)
-
-    @test size(cres) == (4, 6)
-    for n in ("a", "b", "lv_mean")
-        @test n ∈ String.(names(cres))
-    end
-    @test "c" ∉ names(cres)
-    @test all(startswith.(cres[!, "path"], projectdir()))
-
-    relpathname = joinpath(dirname(folder), "results_relpath_$(basename(folder))." * ending)
-    cres_relpath = collect_results!(relpathname, folder;
-        subfolders=true, special_list=special_list, black_list=black_list,
-        rpath=projectdir())
-    @info all(startswith.(cres[!, "path"], "data"))
-
-    struct testDummy
-        a::Float64
-        b::Int64
-        c::Matrix{Float64}
-    end
+df = collect_results!(datadir("results/"))      # This would produce the incorrect file. (Issue#181)
 
     fname = "dummy.jld2"
     dummymat = Float64[1 2 3; 0 0 0; 4 5 6]