From 5bae5d78f20d7f0ad44c92e781be7f7220a25e19 Mon Sep 17 00:00:00 2001 From: Johanni Brea Date: Tue, 6 Dec 2022 11:02:50 +0100 Subject: [PATCH 1/2] use Scratch.jl --- Project.toml | 8 +++++--- src/OpenML.jl | 10 +++++++--- src/data.jl | 22 +++++----------------- test/data.jl | 23 +++++++++++++++-------- 4 files changed, 32 insertions(+), 31 deletions(-) diff --git a/Project.toml b/Project.toml index a44b0d9..40ff7ee 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OpenML" uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" authors = ["Diego Arenas ", "Anthony D. Blaom "] -version = "0.3.0" +version = "0.2.0" [deps] ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8" @@ -9,16 +9,18 @@ HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Scratch = "6c6a2e73-6563-6170-7368-637461726353" [compat] ARFFFiles = "1.4.1" -HTTP = "0.8, 0.9,1" +HTTP = "0.8, 0.9, 1" JSON = "0.21" julia = "1" [extras] Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" [targets] -test = ["Tables", "Test"] +test = ["Tables", "Test", "Logging"] diff --git a/src/OpenML.jl b/src/OpenML.jl index b1fda67..c66396e 100644 --- a/src/OpenML.jl +++ b/src/OpenML.jl @@ -4,12 +4,16 @@ using HTTP using JSON import ARFFFiles using Markdown -if VERSION > v"1.3.0" - using Pkg.Artifacts -end +using Scratch export OpenML +download_cache = "" + include("data.jl") +function __init__() + global download_cache = @get_scratch!("datasets") +end + end # module diff --git a/src/data.jl b/src/data.jl index 1cf5af3..e3bf2ef 100644 --- a/src/data.jl +++ b/src/data.jl @@ -67,24 +67,12 @@ peek_table = OpenML.load(61, maxbytes = 1024) # load only the first 1024 bytes o ``` """ function load(id::Int; maxbytes = nothing) - if VERSION > v"1.3.0" - dir = first(Artifacts.artifacts_dirs()) - toml = joinpath(dir, "OpenMLArtifacts.toml") - hash = artifact_hash(string(id), toml) - if hash === nothing || !artifact_exists(hash) - hash = Artifacts.create_artifact() do artifact_dir - url = load_Dataset_Description(id)["data_set_description"]["url"] - download(url, joinpath(artifact_dir, "$id.arff")) - end - bind_artifact!(toml, string(id), hash) - end - filename = joinpath(artifact_path(hash), "$id.arff") - else - url = load_Dataset_Description(id)["data_set_description"]["url"] - filename = tempname() - download(url, filename) + fname = joinpath(download_cache, "$id.arff") + if !isfile(fname) + @info "Downloading dataset $id." + download(load_Dataset_Description(id)["data_set_description"]["url"], fname) end - ARFFFiles.load(x -> ARFFFiles.readcolumns(x; maxbytes = maxbytes), filename) + ARFFFiles.load(x -> ARFFFiles.readcolumns(x; maxbytes = maxbytes), fname) end diff --git a/test/data.jl b/test/data.jl index 9f33b7c..32df589 100644 --- a/test/data.jl +++ b/test/data.jl @@ -40,15 +40,22 @@ end @test length(filters_test["data"]["dataset"][1]) == offset end -if VERSION > v"1.3.0" - using Pkg - @testset "artifacts" begin - dir = first(Pkg.Artifacts.artifacts_dirs()) - toml = joinpath(dir, "OpenMLArtifacts.toml") - hash = Pkg.Artifacts.artifact_hash("61", toml) - @test Pkg.Artifacts.artifact_exists(hash) +@testset "scratch" begin + OpenML.load(61) + fname = joinpath(OpenML.download_cache, "61.arff") + @test isfile(fname) + using Logging + io = IOBuffer() + logger = SimpleLogger(io) + with_logger(logger) do + OpenML.load(61) end + @test match(r"Downloading dataset 61", String(take!(io))) === nothing + rm(fname) + with_logger(logger) do + OpenML.load(61) + end + @test match(r"Downloading dataset 61", String(take!(io))) !== nothing end end -true From fd0a277753c8a0f39b4ae153dfc5e78b3373a2dc Mon Sep 17 00:00:00 2001 From: Johanni Brea Date: Tue, 6 Dec 2022 11:06:10 +0100 Subject: [PATCH 2/2] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 40ff7ee..339f25d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OpenML" uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" authors = ["Diego Arenas ", "Anthony D. Blaom "] -version = "0.2.0" +version = "0.3.1" [deps] ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"