Skip to content

Commit

Permalink
refactor load_function kwarg for collect_results
Browse files Browse the repository at this point in the history
The purpose of this commit is to incorpoate comments from pull request
 - removes the anonymous function wrapper, except from `to_data_row`
which requires the default be an anonymous function so that the "r"
parameter can be fixed in the call.
 - reverts all the autoformatting

As before, a test was added to `update_results_tests.jl`. All tests
passed, 589 of 589.
  • Loading branch information
NuclearPowerNerd committed Aug 21, 2024
1 parent 71dd90b commit 6e6ff07
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 84 deletions.
37 changes: 14 additions & 23 deletions src/result_collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ See also [`collect_results`](@ref).
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
* `special_list = []`: List of additional (derived) key-value pairs
to put in `df` as explained below.
* `load_function = (filename) -> wload(filename)`: function for loading data from file. This is useful in the event you have data saved as a struct. When loaded from file it will be as a one-element `Dict` which is not what you want passed to the dataframe. Instead, you'd rather have the fields of the struct to be available as columns of the dataframe. In that case you can use this function to ensure the struct is converted to a dict before being processed by `collect_results!`. For example, `load_function = (filename) -> struct2dict(wload(filename)["my_key"])`.
* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.
`special_list` is a `Vector` where each entry
is a derived quantity to be included in `df`. There are two types of entries.
Expand Down Expand Up @@ -84,15 +84,15 @@ Base.showerror(io::IO, e::InvalidResultsCollection) = print(io, e.msg)


function collect_results!(filename, folder;
valid_filetypes=[".bson", "jld", ".jld2"],
subfolders=false,
rpath=nothing,
verbose=true,
update=false,
newfile=false, # keyword only for defining collect_results without !
rinclude=[r""],
rexclude=[r"^\b$"],
load_function=(filename) -> wload(filename),
valid_filetypes = [".bson", "jld", ".jld2"],
subfolders = false,
rpath = nothing,
verbose = true,
update = false,
newfile = false, # keyword only for defining collect_results without !
rinclude = [r""],
rexclude = [r"^\b$"],
load_function = wload,
kwargs...)

@assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
Expand Down Expand Up @@ -234,24 +234,15 @@ is_valid_file(file, valid_filetypes) =
any(endswith(file, v) for v in valid_filetypes)

# Use wload per default when nothing else is available
function to_data_row(
file::File;
load_function=(filename) -> wload(filename),
kwargs...
)
function to_data_row(file::File; load_function=wload, kwargs...)
fpath = filename(file)
@debug "Opening $fpath with fallback wload."
@debug "Opening $(filename(file)) with fallback wload."
return to_data_row(load_function(fpath), fpath; kwargs...)
end
# Specialize for JLD2 files, can do much faster mmapped access
function to_data_row(
file::File{format"JLD2"};
load_function=(filename) -> JLD2.jldopen(filename, "r"),
kwargs...
)
function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
fpath = filename(file)
@debug "Opening $fpath with jldopen."

@debug "Opening $(filename(file)) with jldopen."
data = load_function(fpath)
return to_data_row(data, fpath; kwargs...)
end
Expand Down
138 changes: 77 additions & 61 deletions test/update_results_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,71 +4,87 @@ using BSON, DataFrames, FileIO, JLD2

@testset "Collect Results ($ending)" for ending ["bson", "jld2"]

###############################################################################
# Setup Folder structure #
###############################################################################
# %%
cd(@__DIR__)
isdir("testdir") && rm("testdir", recursive=true)
mkdir("testdir")
initialize_project("testdir"; git=false)
quickactivate("testdir")

###############################################################################
# Create Dummy Data #
###############################################################################
mkdir(datadir("results"))
cd(datadir("results"))

d = Dict("a" => 1, "b" => "2", "c" => rand(10))
DrWatson.wsave(savename(d) * "." * ending, d)

d = Dict("a" => 3, "b" => "4", "c" => rand(10), "d" => Float64)
DrWatson.wsave(savename(d) * "." * ending, d)

d = Dict("a" => 3, "c" => rand(10), "d" => Float64)
DrWatson.wsave(savename(d) * "." * ending, d)

mkdir("subfolder")
cd("subfolder")
###############################################################################
# Setup Folder structure #
###############################################################################
# %%
cd(@__DIR__)
isdir("testdir") && rm("testdir", recursive=true)
mkdir("testdir")
initialize_project("testdir"; git = false)
quickactivate("testdir")

###############################################################################
# Create Dummy Data #
###############################################################################
mkdir(datadir("results"))
cd(datadir("results"))

d = Dict("a" => 1, "b" => "2", "c" => rand(10))
DrWatson.wsave(savename(d)*"."*ending, d)

d = Dict("a" => 3, "b" => "4", "c" => rand(10), "d" => Float64)
DrWatson.wsave(savename(d)*"."*ending, d)

d = Dict("a" => 3, "c" => rand(10), "d" => Float64)
DrWatson.wsave(savename(d)*"."*ending, d)

mkdir("subfolder")
cd("subfolder")

d = Dict("a" => 4., "b" => "twenty" , "d" => Int)
DrWatson.wsave(savename(d)*"."*ending, d)

###############################################################################
# Collect Data Into DataFrame #
###############################################################################
using Statistics
special_list = [ :lv_mean => data -> mean(data["c"]),
:lv_var => data -> var(data["c"])]

black_list = ["c"]

folder = datadir("results")

defaultname = joinpath(dirname(folder), "results_$(basename(folder))."*ending)
isfile(defaultname) && rm(defaultname)
cres = collect_results!(defaultname, folder;
subfolders = true, special_list=special_list, black_list = black_list)

@test size(cres) == (4, 6)
for n in ("a", "b", "lv_mean")
@test n String.(names(cres))
end
@test "c" names(cres)
@test all(startswith.(cres[!,"path"], projectdir()))

relpathname = joinpath(dirname(folder), "results_relpath_$(basename(folder))."*ending)
cres_relpath = collect_results!(relpathname, folder;
subfolders = true, special_list=special_list, black_list = black_list,
rpath = projectdir())
@info all(startswith.(cres[!,"path"], "data"))

struct dummy
a::Float64
b::Int64
c::Matrix{Float64}
end
_dummy_matrix = rand(3,3)
_dummy = dummy(1.0, 1, _dummy_matrix)
wsave(datadir("dummy.jld2"), "dummy", _dummy)

d = Dict("a" => 4.0, "b" => "twenty", "d" => Int)
DrWatson.wsave(savename(d) * "." * ending, d)
actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
_dataframe_vector[1] = _dummy_matrix
expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))

###############################################################################
# Collect Data Into DataFrame #
###############################################################################
using Statistics
special_list = [:lv_mean => data -> mean(data["c"]),
:lv_var => data -> var(data["c"])]
@test actual_dataframe == expected_dataframe

black_list = ["c"]
###############################################################################
# Trailing slash in foldername #
###############################################################################

folder = datadir("results")

defaultname = joinpath(dirname(folder), "results_$(basename(folder))." * ending)
isfile(defaultname) && rm(defaultname)
cres = collect_results!(defaultname, folder;
subfolders=true, special_list=special_list, black_list=black_list)

@test size(cres) == (4, 6)
for n in ("a", "b", "lv_mean")
@test n String.(names(cres))
end
@test "c" names(cres)
@test all(startswith.(cres[!, "path"], projectdir()))

relpathname = joinpath(dirname(folder), "results_relpath_$(basename(folder))." * ending)
cres_relpath = collect_results!(relpathname, folder;
subfolders=true, special_list=special_list, black_list=black_list,
rpath=projectdir())
@info all(startswith.(cres[!, "path"], "data"))

struct testDummy
a::Float64
b::Int64
c::Matrix{Float64}
end
df = collect_results!(datadir("results/")) # This would produce the incorrect file. (Issue#181)

fname = "dummy.jld2"
dummymat = Float64[1 2 3; 0 0 0; 4 5 6]
Expand Down

0 comments on commit 6e6ff07

Please sign in to comment.