From 6e60a1994b2d964fed174bd747dc9b9e91662e24 Mon Sep 17 00:00:00 2001 From: Prasidh Srikumar <49596933+Mobius1D@users.noreply.github.com> Date: Wed, 28 Jul 2021 21:33:36 +0530 Subject: [PATCH] Expand to d4rl-pybullet (#416) * Expand to d4rl-pybullet Support d4rl-pybullet and make a few other changes. * Update NEWS.md and ci.yml Co-authored-by: Jun Tian --- .cspell/cspell.json | 3 +- NEWS.md | 3 +- src/ReinforcementLearningDatasets/README.md | 4 +- .../src/ReinforcementLearningDatasets.jl | 4 +- .../src/d4rl/register.jl | 26 +++++------ .../src/d4rl_pybullet/register.jl | 33 ++++++++++++++ .../src/{d4rl/d4rl_dataset.jl => dataset.jl} | 44 ++++++++++--------- src/ReinforcementLearningDatasets/src/init.jl | 4 ++ .../test/d4rl_pybullet.jl | 32 ++++++++++++++ .../test/{d4rl/d4rl_dataset.jl => dataset.jl} | 26 +++++------ .../test/runtests.jl | 3 +- 11 files changed, 128 insertions(+), 54 deletions(-) create mode 100644 src/ReinforcementLearningDatasets/src/d4rl_pybullet/register.jl rename src/ReinforcementLearningDatasets/src/{d4rl/d4rl_dataset.jl => dataset.jl} (71%) create mode 100644 src/ReinforcementLearningDatasets/src/init.jl create mode 100644 src/ReinforcementLearningDatasets/test/d4rl_pybullet.jl rename src/ReinforcementLearningDatasets/test/{d4rl/d4rl_dataset.jl => dataset.jl} (86%) diff --git a/.cspell/cspell.json b/.cspell/cspell.json index 03bd4c8b4..291bffbcd 100644 --- a/.cspell/cspell.json +++ b/.cspell/cspell.json @@ -97,7 +97,8 @@ "Thibaut", "boxoban", "DATADEPS", - "umaze" + "umaze", + "pybullet" ], "ignoreWords": [], "minWordLength": 5, diff --git a/NEWS.md b/NEWS.md index 82bbc21ff..4dd58c42b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -19,7 +19,8 @@ #### v0.1.0 -- Add functionality for fetching d4rl datasets as an iterable D4RLDataSet. Credits: https://arxiv.org/abs/2004.07219 +- Add functionality for fetching d4rl datasets as an iterable DataSet. Credits: https://arxiv.org/abs/2004.07219 +- This supports d4rl and d4rl-pybullet datasets. - Uses DataDeps for data dependency management. ## ReinforcementLearning.jl@v0.9.0 diff --git a/src/ReinforcementLearningDatasets/README.md b/src/ReinforcementLearningDatasets/README.md index 46480e0db..f423e8cc5 100644 --- a/src/ReinforcementLearningDatasets/README.md +++ b/src/ReinforcementLearningDatasets/README.md @@ -2,6 +2,8 @@ A package to create, manage, store and retrieve datasets for Offline Reinforcement Learning using ReinforcementLearning.jl package. +- This package uses DataDeps.jl to fetch and manage packages. + ### Note: -The package is under active development and for now it supports only d4rl datasets. \ No newline at end of file +The package is under active development and for now it supports d4rl and d4rl-pybullet datasets. \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/src/ReinforcementLearningDatasets.jl b/src/ReinforcementLearningDatasets/src/ReinforcementLearningDatasets.jl index 55f721c89..fe861bd7b 100644 --- a/src/ReinforcementLearningDatasets/src/ReinforcementLearningDatasets.jl +++ b/src/ReinforcementLearningDatasets/src/ReinforcementLearningDatasets.jl @@ -6,6 +6,8 @@ export RLDatasets using DataDeps include("d4rl/register.jl") -include("d4rl/d4rl_dataset.jl") +include("d4rl_pybullet/register.jl") +include("init.jl") +include("dataset.jl") end \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/src/d4rl/register.jl b/src/ReinforcementLearningDatasets/src/d4rl/register.jl index 0ca3aaa39..d307aa560 100644 --- a/src/ReinforcementLearningDatasets/src/d4rl/register.jl +++ b/src/ReinforcementLearningDatasets/src/d4rl/register.jl @@ -1,13 +1,13 @@ -export DATASET_URLS -export REF_MAX_SCORE -export REF_MIN_SCORE +export D4RL_DATASET_URLS +export D4RL_REF_MAX_SCORE +export D4RL_REF_MIN_SCORE """ This file holds the registration information for d4rl datasets. It also registers the information in DataDeps for further use in this package. """ -const DATASET_URLS = Dict{String, String}( +const D4RL_DATASET_URLS = Dict{String, String}( "maze2d-open-v0" => "http://rail.eecs.berkeley.edu/datasets/offline_rl/maze2d/maze2d-open-sparse.hdf5", "maze2d-umaze-v1" => "http://rail.eecs.berkeley.edu/datasets/offline_rl/maze2d/maze2d-umaze-sparse-v1.hdf5", "maze2d-medium-v1" => "http://rail.eecs.berkeley.edu/datasets/offline_rl/maze2d/maze2d-medium-sparse-v1.hdf5", @@ -100,7 +100,7 @@ const DATASET_URLS = Dict{String, String}( ) -const REF_MIN_SCORE = Dict{String, Float32}( +const D4RL_REF_MIN_SCORE = Dict{String, Float32}( "maze2d-open-v0" => 0.01 , "maze2d-umaze-v1" => 23.85 , "maze2d-medium-v1" => 13.13 , @@ -184,7 +184,7 @@ const REF_MIN_SCORE = Dict{String, Float32}( "bullet-maze2d-large-v0"=> 1.820000, ) -const REF_MAX_SCORE = Dict{String, Float32}( +const D4RL_REF_MAX_SCORE = Dict{String, Float32}( "maze2d-open-v0" => 20.66 , "maze2d-umaze-v1" => 161.86 , "maze2d-medium-v1" => 277.39 , @@ -269,13 +269,13 @@ const REF_MAX_SCORE = Dict{String, Float32}( ) # give a prompt for flow and carla tasks -# add checksums -function __init__() - for ds in keys(DATASET_URLS) +function d4rl_init() + repo = "d4rl" + for ds in keys(D4RL_DATASET_URLS) register( DataDep( - "d4rl-" * ds, + repo*"-"* ds, """ Credits: https://arxiv.org/abs/2004.07219 The following dataset is fetched from the d4rl. @@ -283,10 +283,10 @@ function __init__() Dataset information: Name: $(ds) - $(if ds in keys(REF_MAX_SCORE) "MAXIMUM_SCORE: " * string(REF_MAX_SCORE[ds]) end) - $(if ds in keys(REF_MIN_SCORE) "MINIMUM_SCORE: " * string(REF_MIN_SCORE[ds]) end) + $(if ds in keys(D4RL_REF_MAX_SCORE) "MAXIMUM_SCORE: " * string(D4RL_REF_MAX_SCORE[ds]) end) + $(if ds in keys(D4RL_REF_MIN_SCORE) "MINIMUM_SCORE: " * string(D4RL_REF_MIN_SCORE[ds]) end) """, #check if the MAX and MIN score part is even necessary and make the log file prettier - DATASET_URLS[ds], + D4RL_DATASET_URLS[ds], ) ) end diff --git a/src/ReinforcementLearningDatasets/src/d4rl_pybullet/register.jl b/src/ReinforcementLearningDatasets/src/d4rl_pybullet/register.jl new file mode 100644 index 000000000..90f828e03 --- /dev/null +++ b/src/ReinforcementLearningDatasets/src/d4rl_pybullet/register.jl @@ -0,0 +1,33 @@ +export D4RL_PYBULLET_URLS + +const D4RL_PYBULLET_URLS = Dict( + "hopper-bullet-mixed-v0" => "https://www.dropbox.com/s/xv3p0h7dzgxt8xb/hopper-bullet-mixed-v0.hdf5?dl=1", + "walker2d-bullet-random-v0" => "https://www.dropbox.com/s/1gwcfl2nmx6878m/walker2d-bullet-random-v0.hdf5?dl=1", + "hopper-bullet-medium-v0" => "https://www.dropbox.com/s/w22kgzldn6eng7j/hopper-bullet-medium-v0.hdf5?dl=1", + "walker2d-bullet-mixed-v0" => "https://www.dropbox.com/s/i4u2ii0d85iblou/walker2d-bullet-mixed-v0.hdf5?dl=1", + "halfcheetah-bullet-mixed-v0" => "https://www.dropbox.com/s/scj1rqun963aw90/halfcheetah-bullet-mixed-v0.hdf5?dl=1", + "halfcheetah-bullet-random-v0" => "https://www.dropbox.com/s/jnvpb1hp60zt2ak/halfcheetah-bullet-random-v0.hdf5?dl=1", + "walker2d-bullet-medium-v0" => "https://www.dropbox.com/s/v0f2kz48b1hw6or/walker2d-bullet-medium-v0.hdf5?dl=1", + "hopper-bullet-random-v0" => "https://www.dropbox.com/s/bino8ojd7iq4p4d/hopper-bullet-random-v0.hdf5?dl=1", + "ant-bullet-random-v0" => "https://www.dropbox.com/s/2xpmh4wk2m7i8xh/ant-bullet-random-v0.hdf5?dl=1", + "halfcheetah-bullet-medium-v0" => "https://www.dropbox.com/s/v4xgssp1w968a9l/halfcheetah-bullet-medium-v0.hdf5?dl=1", + "ant-bullet-medium-v0" => "https://www.dropbox.com/s/6n79kwd94xthr1t/ant-bullet-medium-v0.hdf5?dl=1", + "ant-bullet-mixed-v0" => "https://www.dropbox.com/s/pmy3dzab35g4whk/ant-bullet-mixed-v0.hdf5?dl=1" +) + +function d4rl_pybullet_init() + repo = "d4rl-pybullet" + for ds in keys(D4RL_PYBULLET_URLS) + register( + DataDep( + repo* "-" * ds, + """ + Credits: https://github.com/takuseno/d4rl-pybullet + The following dataset is fetched from the d4rl-pybullet. + """, + D4RL_PYBULLET_URLS[ds], + ) + ) + end + nothing +end \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/src/d4rl/d4rl_dataset.jl b/src/ReinforcementLearningDatasets/src/dataset.jl similarity index 71% rename from src/ReinforcementLearningDatasets/src/d4rl/d4rl_dataset.jl rename to src/ReinforcementLearningDatasets/src/dataset.jl index 6557edce3..1571a2de2 100644 --- a/src/ReinforcementLearningDatasets/src/d4rl/d4rl_dataset.jl +++ b/src/ReinforcementLearningDatasets/src/dataset.jl @@ -7,15 +7,16 @@ import Base: iterate, length, IteratorEltype export dataset export SARTS export SART -export D4RLDataSet +export DataSet -const SARTS = (:state, :action, :reward, :terminals, :next_state) -const SART = (:state, :action, :reward, :terminals) +const SARTS = (:state, :action, :reward, :terminal, :next_state) +const SART = (:state, :action, :reward, :terminal) """ -Represents a iterable dataset from d4rl with the following fields: +Represents a iterable dataset with the following fields: `dataset`: Dict{Symbol, Any}, representation of the dataset as a Dictionary with style as `style` +`repo`: String, the repository from which the dataset is taken `size`: Integer, the size of the dataset `batch_size`: Integer, the size of the batches returned by `iterate`. `style`: Tuple, the type of the NamedTuple, for now SARTS and SART is supported. @@ -23,8 +24,9 @@ Represents a iterable dataset from d4rl with the following fields: `meta`: Dict, the metadata provided along with the dataset `is_shuffle`: Bool, determines if the batches returned by `iterate` are shuffled. """ -struct D4RLDataSet{T<:AbstractRNG} +struct DataSet{T<:AbstractRNG} dataset::Dict{Symbol, Any} + repo::String size::Integer batch_size::Integer style::Tuple @@ -39,11 +41,12 @@ end """ dataset(dataset::String; style::Tuple, rng<:AbstractRNG, is_shuffle::Bool, max_iters::Int64, batch_size::Int64) -Creates a dataset of enclosed in a D4RLDataSet type and other related metadata for the `dataset` that is passed. -The dataset type is an iterable that fetches batches when used in a for loop for convenience during offline training. +Creates a dataset of enclosed in a DataSet type and other related metadata for the `dataset` that is passed. +The `DataSet` type is an iterable that fetches batches when used in a for loop for convenience during offline training. -`dataset`: Name of the D4RLDataSet dataset. -`style`: the style of the iterator and the Dict inside D4RLDataSet that is returned. +`dataset`: Dict{Symbol, Any}, Name of the datset. +`repo`: Name of the repository of the dataset. +`style`: the style of the iterator and the Dict inside DataSet that is returned. `rng`: StableRNG `max_iters`: maximum number of iterations for the iterator. `is_shuffle`: whether the dataset is shuffled or not. `true` by default. @@ -52,19 +55,20 @@ The dataset type is an iterable that fetches batches when used in a for loop for The returned type is an infinite iterator which can be called using `iterate` and will return batches as specified in the dataset. """ function dataset(dataset::String; - style=SARTS, + style=SARTS, + repo = "d4rl", rng = StableRNG(123), is_shuffle = true, batch_size=256 ) try - @datadep_str "d4rl-"*dataset + @datadep_str repo*"-"*dataset catch throw("The provided dataset is not available") end - path = @datadep_str "d4rl-"*dataset + path = @datadep_str repo*"-"*dataset @assert length(readdir(path)) == 1 file_name = readdir(path)[1] @@ -79,7 +83,7 @@ function dataset(dataset::String; dataset = Dict{Symbol, Any}() meta = Dict{String, Any}() - N_samples = size(data["terminals"])[1] + N_samples = size(data["observations"])[2] for (key, d_key) in zip(["observations", "actions", "rewards", "terminals"], Symbol.(["state", "action", "reward", "terminal"])) dataset[d_key] = data[key] @@ -91,11 +95,11 @@ function dataset(dataset::String; end end - return D4RLDataSet(dataset, N_samples, batch_size, style, rng, meta, is_shuffle) + return DataSet(dataset, repo, N_samples, batch_size, style, rng, meta, is_shuffle) end -function iterate(ds::D4RLDataSet, state = 0) +function iterate(ds::DataSet, state = 0) rng = ds.rng batch_size = ds.batch_size size = ds.size @@ -127,9 +131,9 @@ function iterate(ds::D4RLDataSet, state = 0) end -take(ds::D4RLDataSet, n::Integer) = take(ds.dataset, n) -length(ds::D4RLDataSet) = ds.size -IteratorEltype(::Type{D4RLDataSet}) = EltypeUnknown() # see if eltype can be known (not sure about carla and adroit) +take(ds::DataSet, n::Integer) = take(ds.dataset, n) +length(ds::DataSet) = ds.size +IteratorEltype(::Type{DataSet}) = EltypeUnknown() # see if eltype can be known (not sure about carla and adroit) function verify(data::Dict{String, Any}) @@ -137,6 +141,6 @@ function verify(data::Dict{String, Any}) @assert (key in keys(data)) "Expected keys not present in data" end N_samples = size(data["observations"])[2] - @assert size(data["rewards"]) == (N_samples,) - @assert size(data["terminals"]) == (N_samples,) + @assert size(data["rewards"]) == (N_samples,) || size(data["rewards"]) == (1, N_samples) + @assert size(data["terminals"]) == (N_samples,) || size(data["terminals"]) == (1, N_samples) end \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/src/init.jl b/src/ReinforcementLearningDatasets/src/init.jl new file mode 100644 index 000000000..166ef6d82 --- /dev/null +++ b/src/ReinforcementLearningDatasets/src/init.jl @@ -0,0 +1,4 @@ +function __init__() + RLDatasets.d4rl_init() + RLDatasets.d4rl_pybullet_init() +end \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/test/d4rl_pybullet.jl b/src/ReinforcementLearningDatasets/test/d4rl_pybullet.jl new file mode 100644 index 000000000..a146647c7 --- /dev/null +++ b/src/ReinforcementLearningDatasets/test/d4rl_pybullet.jl @@ -0,0 +1,32 @@ +using Base: batch_size_err_str +@testset "d4rl_pybullet" begin + ds = dataset( + "hopper-bullet-mixed-v0"; + repo="d4rl-pybullet", + style = style, + rng = rng, + is_shuffle = true, + batch_size = batch_size + ) + + n_s = 15 + n_a = 3 + + N_samples = 59345 + + data_dict = ds.dataset + + @test size(data_dict[:state]) == (n_s, N_samples) + @test size(data_dict[:action]) == (n_a, N_samples) + @test size(data_dict[:reward]) == (1, N_samples) + @test size(data_dict[:terminal]) == (1, N_samples) + + for sample in Iterators.take(ds, 3) + @test typeof(sample) <: NamedTuple{SARTS} + @test size(sample[:state]) == (n_s, batch_size) + @test size(sample[:action]) == (n_a, batch_size) + @test size(sample[:reward]) == (1, batch_size) || size(sample[:reward]) == (batch_size,) + @test size(sample[:terminal]) == (1, batch_size) || size(sample[:terminal]) == (batch_size,) + end + +end \ No newline at end of file diff --git a/src/ReinforcementLearningDatasets/test/d4rl/d4rl_dataset.jl b/src/ReinforcementLearningDatasets/test/dataset.jl similarity index 86% rename from src/ReinforcementLearningDatasets/test/d4rl/d4rl_dataset.jl rename to src/ReinforcementLearningDatasets/test/dataset.jl index dd9da8304..1787ce544 100644 --- a/src/ReinforcementLearningDatasets/test/d4rl/d4rl_dataset.jl +++ b/src/ReinforcementLearningDatasets/test/dataset.jl @@ -1,13 +1,14 @@ n_s = 11 n_a = 3 -N_samples = 200919 batch_size = 256 style = SARTS rng = StableRNG(123) -@testset "dataset_d4rl_shuffle" begin +# TO-DO make functions to make tests modular and more widely applicable +@testset "dataset_shuffle" begin ds = dataset( "hopper-medium-replay-v0"; + repo="d4rl", style = style, rng = rng, is_shuffle = true, @@ -15,18 +16,15 @@ rng = StableRNG(123) ) data_dict = ds.dataset + N_samples = size(data_dict[:state])[2] @test size(data_dict[:state]) == (n_s, N_samples) @test size(data_dict[:action]) == (n_a, N_samples) @test size(data_dict[:reward]) == (N_samples,) @test size(data_dict[:terminal]) == (N_samples,) - i = 1 - - for sample in ds - if i > 5 break end + for sample in Iterators.take(ds, 3) @test typeof(sample) <: NamedTuple - i += 1 end sample1 = iterate(ds) @@ -42,7 +40,7 @@ rng = StableRNG(123) @test length(iters) == 2 for iter in iters - @test typeof(iter) <: NamedTuple + @test typeof(iter) <: NamedTuple{SARTS} end @test iter1 != iter2 @@ -54,7 +52,7 @@ rng = StableRNG(123) end -@testset "dataset_d4rl" begin +@testset "dataset" begin ds = dataset( "hopper-medium-replay-v0"; style = style, @@ -63,20 +61,16 @@ end batch_size = batch_size ) - data_dict = ds.dataset + N_samples = size(data_dict[:state])[2] @test size(data_dict[:state]) == (n_s, N_samples) @test size(data_dict[:action]) == (n_a, N_samples) @test size(data_dict[:reward]) == (N_samples,) @test size(data_dict[:terminal]) == (N_samples,) - i = 1 - - for sample in ds - if i > 5 break end - @test typeof(sample) <: NamedTuple - i += 1 + for sample in Iterators.take(ds, 3) + @test typeof(sample) <: NamedTuple{SARTS} end sample1 = iterate(ds) diff --git a/src/ReinforcementLearningDatasets/test/runtests.jl b/src/ReinforcementLearningDatasets/test/runtests.jl index c20124cd2..8b3b7994c 100644 --- a/src/ReinforcementLearningDatasets/test/runtests.jl +++ b/src/ReinforcementLearningDatasets/test/runtests.jl @@ -6,5 +6,6 @@ using Test ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" @testset "ReinforcementLearningDatasets.jl" begin - include("d4rl/d4rl_dataset.jl") + include("dataset.jl") + include("d4rl_pybullet.jl") end