Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
name = "OpenML"
uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
authors = ["Diego Arenas <darenasc@gmail.com>", "Anthony D. Blaom <anthony.blaom@gmail.com>"]
version = "0.3.0"
version = "0.3.1"

[deps]
ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"

[compat]
ARFFFiles = "1.4.1"
HTTP = "0.8, 0.9,1"
HTTP = "0.8, 0.9, 1"
JSON = "0.21"
julia = "1"

[extras]
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"

[targets]
test = ["Tables", "Test"]
test = ["Tables", "Test", "Logging"]
10 changes: 7 additions & 3 deletions src/OpenML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ using HTTP
using JSON
import ARFFFiles
using Markdown
if VERSION > v"1.3.0"
using Pkg.Artifacts
end
using Scratch

export OpenML

download_cache = ""

include("data.jl")

function __init__()
global download_cache = @get_scratch!("datasets")
end

end # module
22 changes: 5 additions & 17 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,24 +67,12 @@ peek_table = OpenML.load(61, maxbytes = 1024) # load only the first 1024 bytes o
```
"""
function load(id::Int; maxbytes = nothing)
if VERSION > v"1.3.0"
dir = first(Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
hash = artifact_hash(string(id), toml)
if hash === nothing || !artifact_exists(hash)
hash = Artifacts.create_artifact() do artifact_dir
url = load_Dataset_Description(id)["data_set_description"]["url"]
download(url, joinpath(artifact_dir, "$id.arff"))
end
bind_artifact!(toml, string(id), hash)
end
filename = joinpath(artifact_path(hash), "$id.arff")
else
url = load_Dataset_Description(id)["data_set_description"]["url"]
filename = tempname()
download(url, filename)
fname = joinpath(download_cache, "$id.arff")
if !isfile(fname)
@info "Downloading dataset $id."
download(load_Dataset_Description(id)["data_set_description"]["url"], fname)
end
ARFFFiles.load(x -> ARFFFiles.readcolumns(x; maxbytes = maxbytes), filename)
ARFFFiles.load(x -> ARFFFiles.readcolumns(x; maxbytes = maxbytes), fname)
end


Expand Down
23 changes: 15 additions & 8 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,22 @@ end
@test length(filters_test["data"]["dataset"][1]) == offset
end

if VERSION > v"1.3.0"
using Pkg
@testset "artifacts" begin
dir = first(Pkg.Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
hash = Pkg.Artifacts.artifact_hash("61", toml)
@test Pkg.Artifacts.artifact_exists(hash)
@testset "scratch" begin
OpenML.load(61)
fname = joinpath(OpenML.download_cache, "61.arff")
@test isfile(fname)
using Logging
io = IOBuffer()
logger = SimpleLogger(io)
with_logger(logger) do
OpenML.load(61)
end
@test match(r"Downloading dataset 61", String(take!(io))) === nothing
rm(fname)
with_logger(logger) do
OpenML.load(61)
end
@test match(r"Downloading dataset 61", String(take!(io))) !== nothing
end

end
true