Skip to content

Commit

Permalink
MultiAgent Proposal (#861)
Browse files Browse the repository at this point in the history
* add scrap

* broken, but moving forward

* tweaks

* more tweaks

* rough sketch now works

* Add multiagent section

* Try to get compilation to work

* test passes

* Drop file

* format files

* Fix random policy fix

* fix indentation

* add missing tags

* Handle final state

* Add sequential run function, drop seqenv

* drop seqenv

* Drop seq env

* Fix iteration

* Try adding multiagent hook

* Multiplayer fixes

* add export

* Tests pass

* Apply suggestions from code review

Co-authored-by: Henri Dehaybe <47037088+HenriDeh@users.noreply.github.com>

* add tests

* add shortcut

* Tweak tests

* drop function

* tests pass

* tweak dispatch

* expand tests

* fix tests

* Try new approach to players / iteration

* Tweak iteration approach

* drop current player iterator

* Add MultiAgentRL to ci/cd

* Add back CurrentPlayerIterator

* Try next_player! interface

* Fix test

* call functions with player as argument

* Fix reward function

* Fix reward info

* tests seem to pass...

* fix ci

* Fix cicd

* fix cicd

* fix import

* update has changed action

* fix gh action syntax

* Add to CI

* upgrade codecov to v3

* fix installation order

* fix install order

* RLEnv Tests pass

* Fix env tests Rock/Paper/Scissors

* Drop new package, add to RLCore

* drop multiagent

* Fix imports

* add random policy support for multiagent

* drop multi

* fixes

* Drop last traces of MultiAgent package

* Add RLCore dep to RLEnvs

* Fix env test errors

* Move next_player! to RLBase

* drop excess end

* Index on board, not on env
(so that current_player is not relevant after terminal state)

* Fix things

* Fix test

* Fix reset! function

* Expand tests

* Expand tests

* Fix tests

* Relax hook req

* Tweak tests

* Fix loop / call

* Fix test

* Fix tests

* Fix test

* Env tests

* add missing test

* Fix incomplete test

* fix test

* Fix import error

* Add more boilerplate tests, fix bug

* Fix import

* fix testset

* Fix test

* Add docstrings

* Add docstring

* Add multi-agent doc info

* Namespace fix

---------

Co-authored-by: Henri Dehaybe <47037088+HenriDeh@users.noreply.github.com>
  • Loading branch information
jeremiahpslewis and HenriDeh committed Apr 25, 2023
1 parent e2da341 commit 5143f60
Show file tree
Hide file tree
Showing 22 changed files with 624 additions and 182 deletions.
89 changes: 52 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
push:
branches:
- main
tags: '*'
tags: "*"
workflow_dispatch:
jobs:
test:
Expand All @@ -22,13 +22,13 @@ jobs:
fail-fast: false
matrix:
version:
- '1'
- "1"
os:
- ubuntu-latest
arch:
- x64
python-version:
- '3.8'
- "3.8"
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -54,45 +54,51 @@ jobs:
${{ runner.os }}-test-
${{ runner.os }}-
- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: RLBase-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningBase
files: |
src/ReinforcementLearningBase/**
- name: Test RLBase
if: (steps.RLBase-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
if: (steps.RLBase-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
run: |
julia --color=yes -e '
using Pkg;
Pkg.develop(path="src/ReinforcementLearningBase")
Pkg.test("ReinforcementLearningBase", coverage=true)'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3

- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: RLCore-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningCore
files: |
src/ReinforcementLearningCore/**
- name: Test RLCore
if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
run: |
julia --color=yes -e '
using Pkg;
Pkg.develop(path="src/ReinforcementLearningBase")
Pkg.develop(path="src/ReinforcementLearningEnvironments")
Pkg.develop(path="src/ReinforcementLearningCore")
Pkg.develop(path="src/ReinforcementLearningEnvironments")
Pkg.test("ReinforcementLearningCore", coverage=true)'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3

- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: RLZoo-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningZoo
files: |
src/ReinforcementLearningZoo/**
- name: Test RLZoo
if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLZoo-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLZoo-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
run: |
julia --color=yes -e '
using Pkg;
Expand All @@ -102,15 +108,17 @@ jobs:
Pkg.develop(path="src/ReinforcementLearningZoo")
Pkg.test("ReinforcementLearningZoo", coverage=true)'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3

- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: RLEnvironments-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningEnvironments
files: |
src/ReinforcementLearningEnvironments/**
- name: Test RLEnvironments
if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLEnvironments-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLEnvironments-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
run: |
julia --color=yes -e '
using Pkg;
Expand All @@ -119,30 +127,34 @@ jobs:
Pkg.develop(path="src/ReinforcementLearningEnvironments")
Pkg.test("ReinforcementLearningEnvironments", coverage=true)'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: marceloprado/has-changed-path@v1
- uses: codecov/codecov-action@v3

- name: Get changed files
id: RLDatasets-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningDatasets
files: |
src/ReinforcementLearningDatasets/**
# - name: Test RLDatasets
# if: (steps.RLDatasets-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
# if: (steps.RLDatasets-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
# run: |
# julia --color=yes -e '
# using Pkg;
# Pkg.develop(path="src/ReinforcementLearningDatasets")
# Pkg.test("ReinforcementLearningDatasets", coverage=true)'
# - uses: julia-actions/julia-processcoverage@v1
# - uses: codecov/codecov-action@v1
# - uses: codecov/codecov-action@v3

- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: RLExperiments-changed
uses: tj-actions/changed-files@v35
with:
paths: src/ReinforcementLearningExperiments
files: |
src/ReinforcementLearningExperiments/**
- name: Test RLExperiments
if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLZoo-changed.outputs.changed == 'true') || (steps.RLExperiments-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLZoo-changed.outputs.any_changed == 'true') || (steps.RLExperiments-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
run: |
julia --color=yes -e '
using Pkg
Expand All @@ -154,31 +166,34 @@ jobs:
Pkg.build("ReinforcementLearningExperiments")
Pkg.test("ReinforcementLearningExperiments", coverage=true)'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
- uses: codecov/codecov-action@v3

docs:
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: marceloprado/has-changed-path@v1
- name: Get changed files
id: documentation-changed
uses: tj-actions/changed-files@v35
with:
paths: docs
files: |
docs/**
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: ${{ matrix.arch }}
- if: (steps.documentation-changed.outputs.changed == 'true')
- if: (steps.documentation-changed.outputs.any_changed == 'true')
run: python -m pip install --user matplotlib
- uses: julia-actions/setup-julia@v1
with:
version: '1'
version: "1"
- name: Build homepage
if: (steps.documentation-changed.outputs.changed == 'true')
if: (steps.documentation-changed.outputs.any_changed == 'true')
run: |
cd docs/homepage
julia --project --color=yes -e '
Expand All @@ -187,7 +202,7 @@ jobs:
using Franklin;
optimize()' > build.log
- name: Make sure homepage is generated without error
if: (steps.documentation-changed.outputs.changed == 'true')
if: (steps.documentation-changed.outputs.any_changed == 'true')
run: |
if grep -1 "Franklin Warning" build.log; then
echo "Franklin reported a warning"
Expand All @@ -196,7 +211,7 @@ jobs:
echo "Franklin did not report a warning"
fi
- name: Build docs
if: (steps.documentation-changed.outputs.changed == 'true')
if: (steps.documentation-changed.outputs.any_changed == 'true')
run: |
cd docs
julia --project --color=yes -e '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,9 @@ In offline reinforcement learning, we often assume the experience is prepared ah

### 3.4 Multi-Agent Reinforcement Learning

In our initial workflow, there's only one agent interacting with the environment. To expand it to the multi-agent setting, a policy wrapper of `MultiAgentPolicy` is added. At each stage, it fetch necessary information and forward the `env` to its children. Then based on the current player of the `env`, it selects the right child and generate an action properly.
In our initial workflow, there's only one agent interacting with the environment. To expand it to the multi-agent setting, a policy wrapper of `MultiAgentPolicy` and `MultiAgentHook` is added. At each stage, it fetches necessary information and forward the `env` to its children. Then based on the current player of the `env`, it selects the right child and generate an action properly.

There are two `MultiAgent` cases, `Sequential` and `Simultaneous`. For `Sequential` environments, `RLBase.next_player!` and `current_player` must be implemented so that the `Base.run` loop knows the order of play. For `Simultaneous`, the working assumption is that all players provided in `MultiAgentPolicy` play every turn. Two basic examples are provided, `TicTacToeEnv` and `RockPaperScissorsEnv`.

#### Counterfactual Regret Minimization

Expand All @@ -398,10 +400,10 @@ For each policy in our package, we provide at least an `Experiment` to make sure

It's hard to imagine that it's been years since we created this package. The following tips are what we learned during this period:

1. Keep interfaces stupid simple and minimal
1. Keep interfaces simple and minimal

Adding new APIs is very cheap, but soon you will be the only one who knows
how to use them. Keeping APIs stupid simple and minimal will force you
how to use them. Keeping APIs simple and minimal will force you
rethink your existing design and come up with a more natural one. Actually,
the multi-dispatch in Julia encourages you to generalize the interfaces as
much as possible.
Expand Down
10 changes: 9 additions & 1 deletion src/ReinforcementLearningBase/src/CommonRLInterface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,12 @@ ActionStyle(env::RLBaseEnv) =
CRL.provided(CRL.valid_actions, env.env) ? FullActionSet() : MinimalActionSet()

current_player(env::RLBaseEnv) = CRL.player(env.env)
players(env::RLBaseEnv) = CRL.players(env.env)
players(env::RLBaseEnv) = CRL.players(env.env)

#
"""
next_player!(env::E) where {E<:AbstractEnv}
Advance to the next player. This is a no-op for single-player and simultaneous games. `Sequential` `MultiAgent` games should implement this method.
"""
next_player!(env::E) where {E<:AbstractEnv} = nothing
15 changes: 12 additions & 3 deletions src/ReinforcementLearningBase/src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ function test_interfaces!(env)
@test state(Y) == state(X)
@test reward(Y) == reward(X)
@test is_terminated(Y) == is_terminated(X)

RLBase.next_player!(X)
RLBase.next_player!(Y)
end
end

Expand Down Expand Up @@ -151,17 +154,21 @@ function test_interfaces!(env)
reset!(env)
rewards = [0.0 for p in players(env)]
while !is_terminated(env)
if InformationStyle(env) === PERFECT_INFORMATION
for p in players(env)
for p in players(env)
if InformationStyle(env) === PERFECT_INFORMATION
@test state(env) == state(env, p)
end
# TODO: Make this test more specific...
@test !(RLBase.legal_action_space(env, p) isa Nothing)
@test !(RLBase.legal_action_space(env) isa Nothing)
end
a = rand(rng, legal_action_space(env))
a = rand(rng, RLBase.legal_action_space(env))
env(a)
for (i, p) in enumerate(players(env))
@test state(env, p) state_space(env, p)
rewards[i] += reward(env, p)
end
next_player!(env)
end
# even the game is already terminated
# make sure each player can still get some necessary info
Expand Down Expand Up @@ -207,6 +214,8 @@ function test_runnable!(env, n=1000; rng=Random.GLOBAL_RNG)
s = state(env)
@test s in S
env(a)
next_player!(env)

if is_terminated(env)
reset!(env)
end
Expand Down
3 changes: 2 additions & 1 deletion src/ReinforcementLearningBase/src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ abstract type AbstractNumAgentStyle <: AbstractEnvStyle end
`n` must be ≥ 2.
"""
function MultiAgent(n::Integer)
if n < 0
if n < 1
throw(ArgumentError("number of agents must be > 1, get $n"))
elseif n == 1
throw(ArgumentError("do you mean `SINGLE_AGENT`?"))
Expand Down Expand Up @@ -561,6 +561,7 @@ Treat the `env` as a game tree. Create an independent child after applying
@api function child(env::AbstractEnv, action)
new_env = copy(env)
new_env(action)
next_player!(new_env) # NoOp for simultaneous games and single player games
new_env
end

Expand Down
42 changes: 42 additions & 0 deletions src/ReinforcementLearningBase/test/interface.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
using ReinforcementLearningBase

struct TestEnv <: RLBase.AbstractEnv
state::Int
end

function RLBase.state(env::TestEnv, ::Observation{Any}, ::DefaultPlayer)
return env.state
end

@testset "MultiAgent" begin
@test MultiAgent(2) isa MultiAgent
@test_throws ArgumentError MultiAgent(1)
@test_throws ArgumentError MultiAgent(-1)
end

@testset "InformationSet" begin
InformationSet() isa RLBase.AbstractStateStyle
end

@testset "InternalState" begin
InternalState() isa RLBase.AbstractStateStyle
end

@testset "Observation" begin
Observation() isa RLBase.AbstractStateStyle
end

@testset "EpisodeStyle" begin
EpisodeStyle(TestEnv(10)) isa RLBase.AbstractEpisodeStyle
end

@testset "AbstractEnv" begin
@test TestEnv(10) isa RLBase.AbstractEnv
@test TestEnv(10) == TestEnv(10)
@test Base.hash(TestEnv(10), UInt64(0)) == Base.hash(TestEnv(10), UInt64(0))
end

@testset "players" begin
@test simultaneous_player(TestEnv(10)) == SimultaneousPlayer()
@test RLBase.players(TestEnv(10)) == (DefaultPlayer(),)
end
3 changes: 2 additions & 1 deletion src/ReinforcementLearningBase/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ using POMDPs
using POMDPTools: Deterministic

@testset "ReinforcementLearningBase" begin
include("CommonRLInterface.jl")
include("CommonRLInterface.jl")
include("interface.jl")
end
3 changes: 3 additions & 0 deletions src/ReinforcementLearningCore/src/policies/agent/agent.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include("base.jl")
include("agent_srt_cache.jl")
include("multi_agent.jl")
Loading

0 comments on commit 5143f60

Please sign in to comment.