Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
name = "OpenML"
uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
authors = ["Diego Arenas <darenasc@gmail.com>", "Anthony D. Blaom <anthony.blaom@gmail.com>"]
version = "0.1.1"
version = "0.2.0"

[deps]
ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"

[compat]
ARFFFiles = "1.3"
HTTP = "0.8, 0.9"
JSON = "0.21"
ScientificTypes = "2"
julia = "1"

[extras]
Expand Down
3 changes: 1 addition & 2 deletions src/OpenML.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
module OpenML
module OpenML

using HTTP
using JSON
import ARFFFiles
import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
using Markdown
if VERSION > v"1.3.0"
using Pkg.Artifacts
Expand Down
35 changes: 19 additions & 16 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,35 @@ function load_Dataset_Description(id::Int; api_key::String="")
println("No access granted. This dataset is not shared with you.")
end
catch e
println("Error occurred : $e")
println("Error occurred. Check if there exists a dataset with id $id.")
println("See e.g. OpenML.list_datasets()\n")
println(e)
return nothing
end
return nothing
end

"""
OpenML.load(id; parser = :arff)
OpenML.load(id)

Load the OpenML dataset with specified `id`, from those listed by
[`list_datasets`](@ref) or on the [OpenML site](https://www.openml.org/search?type=data).
With `parser = :arff` (default) the ARFFFiles.jl parser is used.
With `parser = :auto` the output of the ARFFFiles parser is coerced to
automatically detected scientific types.

Datasets are saved as julia artifacts so that they persist locally once loaded.
Datasets are saved as julia artifacts so that they persist locally once loaded.

Returns a table.

# Examples

```julia
using DataFrames
table = OpenML.load(61);
df = DataFrame(table);
table = OpenML.load(61)
df = DataFrame(table) # transform to a DataFrame
using ScientificTypes
df2 = coerce(df, autotype(df)) # corce to automatically detected scientific types
```
"""
function load(id::Int; parser = :arff)
function load(id::Int)
if VERSION > v"1.3.0"
dir = first(Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
Expand All @@ -73,12 +74,7 @@ function load(id::Int; parser = :arff)
filename = tempname()
download(url, filename)
end
data = ARFFFiles.load(filename)
if parser == :auto
return coerce(data, autotype(data))
else
return data
end
ARFFFiles.load(filename)
end


Expand Down Expand Up @@ -321,7 +317,14 @@ julia> OpenML.describe_dataset(6)
cited above for more details.
```
"""
describe_dataset(id) = Markdown.parse(load_Dataset_Description(id)["data_set_description"]["description"])
function describe_dataset(id)
description = load_Dataset_Description(id)["data_set_description"]["description"]
if isa(description, AbstractString)
Markdown.parse(description)
else
"No description found."
end
end

# Flow API

Expand Down
8 changes: 4 additions & 4 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ end
end

if VERSION > v"1.3.0"
using Pkg.Artifacts
using Pkg
@testset "artifacts" begin
dir = first(Artifacts.artifacts_dirs())
dir = first(Pkg.Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
hash = artifact_hash("61", toml)
@test artifact_exists(hash)
hash = Pkg.Artifacts.artifact_hash("61", toml)
@test Pkg.Artifacts.artifact_exists(hash)
end
end

Expand Down