MultiAgent Proposal (#861)

* add scrap * broken, but moving forward * tweaks * more tweaks * rough sketch now works * Add multiagent section * Try to get compilation to work * test passes * Drop file * format files * Fix random policy fix * fix indentation * add missing tags * Handle final state * Add sequential run function, drop seqenv * drop seqenv * Drop seq env * Fix iteration * Try adding multiagent hook * Multiplayer fixes * add export * Tests pass * Apply suggestions from code review Co-authored-by: Henri Dehaybe <47037088+HenriDeh@users.noreply.github.com> * add tests * add shortcut * Tweak tests * drop function * tests pass * tweak dispatch * expand tests * fix tests * Try new approach to players / iteration * Tweak iteration approach * drop current player iterator * Add MultiAgentRL to ci/cd * Add back CurrentPlayerIterator * Try next_player! interface * Fix test * call functions with player as argument * Fix reward function * Fix reward info * tests seem to pass... * fix ci * Fix cicd * fix cicd * fix import * update has changed action * fix gh action syntax * Add to CI * upgrade codecov to v3 * fix installation order * fix install order * RLEnv Tests pass * Fix env tests Rock/Paper/Scissors * Drop new package, add to RLCore * drop multiagent * Fix imports * add random policy support for multiagent * drop multi * fixes * Drop last traces of MultiAgent package * Add RLCore dep to RLEnvs * Fix env test errors * Move next_player! to RLBase * drop excess end * Index on board, not on env (so that current_player is not relevant after terminal state) * Fix things * Fix test * Fix reset! function * Expand tests * Expand tests * Fix tests * Relax hook req * Tweak tests * Fix loop / call * Fix test * Fix tests * Fix test * Env tests * add missing test * Fix incomplete test * fix test * Fix import error * Add more boilerplate tests, fix bug * Fix import * fix testset * Fix test * Add docstrings * Add docstring * Add multi-agent doc info * Namespace fix --------- Co-authored-by: Henri Dehaybe <47037088+HenriDeh@users.noreply.github.com>
JuliaReinforcementLearning · Apr 25, 2023 · 5143f60 · 5143f60
1 parent e2da341
commit 5143f60
Show file tree

Hide file tree

Showing 22 changed files with 624 additions and 182 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ on:
   push:
     branches:
       - main
-    tags: '*'
+    tags: "*"
   workflow_dispatch:
 jobs:
   test:
@@ -22,13 +22,13 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1'
+          - "1"
         os:
           - ubuntu-latest
         arch:
           - x64
         python-version:
-          - '3.8'
+          - "3.8"
     steps:
       - uses: actions/checkout@v3
         with:
@@ -54,45 +54,51 @@ jobs:
             ${{ runner.os }}-test-
             ${{ runner.os }}-
 
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: RLBase-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningBase
+          files: |
+            src/ReinforcementLearningBase/**
 
       - name: Test RLBase
-        if: (steps.RLBase-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+        if: (steps.RLBase-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
         run: |
           julia --color=yes -e '
             using Pkg;
             Pkg.develop(path="src/ReinforcementLearningBase")
             Pkg.test("ReinforcementLearningBase", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
 
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: RLCore-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningCore
+          files: |
+            src/ReinforcementLearningCore/**
 
       - name: Test RLCore
-        if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+        if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
         run: |
           julia --color=yes -e '
             using Pkg;
             Pkg.develop(path="src/ReinforcementLearningBase")
-            Pkg.develop(path="src/ReinforcementLearningEnvironments")
             Pkg.develop(path="src/ReinforcementLearningCore")
+            Pkg.develop(path="src/ReinforcementLearningEnvironments")
             Pkg.test("ReinforcementLearningCore", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
 
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: RLZoo-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningZoo
+          files: |
+            src/ReinforcementLearningZoo/**
 
       - name: Test RLZoo
-        if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLZoo-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+        if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLZoo-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
         run: |
           julia --color=yes -e '
             using Pkg;
@@ -102,15 +108,17 @@ jobs:
             Pkg.develop(path="src/ReinforcementLearningZoo")
             Pkg.test("ReinforcementLearningZoo", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
 
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: RLEnvironments-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningEnvironments
+          files: |
+            src/ReinforcementLearningEnvironments/**
 
       - name: Test RLEnvironments
-        if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLEnvironments-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+        if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLEnvironments-changed.outputs.any_changed == 'true') ||  (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
         run: |
           julia --color=yes -e '
             using Pkg;
@@ -119,30 +127,34 @@ jobs:
             Pkg.develop(path="src/ReinforcementLearningEnvironments")
             Pkg.test("ReinforcementLearningEnvironments", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
-  
-      - uses: marceloprado/has-changed-path@v1
+      - uses: codecov/codecov-action@v3
+
+      - name: Get changed files
         id: RLDatasets-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningDatasets
+          files: |
+            src/ReinforcementLearningDatasets/**
 
       # - name: Test RLDatasets
-      #   if: (steps.RLDatasets-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+      #   if: (steps.RLDatasets-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
       #   run: |
       #     julia --color=yes -e '
       #       using Pkg;
       #       Pkg.develop(path="src/ReinforcementLearningDatasets")
       #       Pkg.test("ReinforcementLearningDatasets", coverage=true)'
       # - uses: julia-actions/julia-processcoverage@v1
-      # - uses: codecov/codecov-action@v1
+      # - uses: codecov/codecov-action@v3
 
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: RLExperiments-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: src/ReinforcementLearningExperiments
+          files: |
+            src/ReinforcementLearningExperiments/**
 
       - name: Test RLExperiments
-        if: (steps.RLBase-changed.outputs.changed == 'true') || (steps.RLCore-changed.outputs.changed == 'true') || (steps.RLZoo-changed.outputs.changed == 'true') || (steps.RLExperiments-changed.outputs.changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
+        if: (steps.RLBase-changed.outputs.any_changed == 'true') || (steps.RLCore-changed.outputs.any_changed == 'true') || (steps.RLZoo-changed.outputs.any_changed == 'true') || (steps.RLExperiments-changed.outputs.any_changed == 'true') || (contains(github.event.pull_request.labels.*.name, 'Integration Test'))
         run: |
           julia --color=yes -e '
             using Pkg
@@ -154,31 +166,34 @@ jobs:
             Pkg.build("ReinforcementLearningExperiments")
             Pkg.test("ReinforcementLearningExperiments", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
-  
+      - uses: codecov/codecov-action@v3
+
   docs:
     name: Documentation
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
-      - uses: marceloprado/has-changed-path@v1
+      - name: Get changed files
         id: documentation-changed
+        uses: tj-actions/changed-files@v35
         with:
-          paths: docs
+          files: |
+            docs/**
+
       - name: Setup python
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
           architecture: ${{ matrix.arch }}
-      - if: (steps.documentation-changed.outputs.changed == 'true')
+      - if: (steps.documentation-changed.outputs.any_changed == 'true')
         run: python -m pip install --user matplotlib
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1'
+          version: "1"
       - name: Build homepage
-        if: (steps.documentation-changed.outputs.changed == 'true')
+        if: (steps.documentation-changed.outputs.any_changed == 'true')
         run: |
           cd docs/homepage
           julia --project --color=yes -e '
@@ -187,7 +202,7 @@ jobs:
             using Franklin;
             optimize()' > build.log
       - name: Make sure homepage is generated without error
-        if: (steps.documentation-changed.outputs.changed == 'true')
+        if: (steps.documentation-changed.outputs.any_changed == 'true')
         run: |
           if grep -1 "Franklin Warning" build.log; then
             echo "Franklin reported a warning"
@@ -196,7 +211,7 @@ jobs:
             echo "Franklin did not report a warning"
           fi
       - name: Build docs
-        if: (steps.documentation-changed.outputs.changed == 'true')
+        if: (steps.documentation-changed.outputs.any_changed == 'true')
         run: |
           cd docs
           julia --project --color=yes -e '

diff --git a/...roduction_to_reinforcement_learning_jl_design_implementations_thoughts/index.md b/...roduction_to_reinforcement_learning_jl_design_implementations_thoughts/index.md
@@ -384,7 +384,9 @@ In offline reinforcement learning, we often assume the experience is prepared ah
 
 ### 3.4 Multi-Agent Reinforcement Learning
 
-In our initial workflow, there's only one agent interacting with the environment. To expand it to the multi-agent setting, a policy wrapper of `MultiAgentPolicy` is added. At each stage, it fetch necessary information and forward the `env` to its children. Then based on the current player of the `env`, it selects the right child and generate an action properly.
+In our initial workflow, there's only one agent interacting with the environment. To expand it to the multi-agent setting, a policy wrapper of `MultiAgentPolicy` and `MultiAgentHook` is added. At each stage, it fetches necessary information and forward the `env` to its children. Then based on the current player of the `env`, it selects the right child and generate an action properly.
+
+There are two `MultiAgent` cases, `Sequential` and `Simultaneous`. For `Sequential` environments, `RLBase.next_player!` and `current_player` must be implemented so that the `Base.run` loop knows the order of play. For `Simultaneous`, the working assumption is that all players provided in `MultiAgentPolicy` play every turn. Two basic examples are provided, `TicTacToeEnv` and `RockPaperScissorsEnv`.
 
 #### Counterfactual Regret Minimization
 
@@ -398,10 +400,10 @@ For each policy in our package, we provide at least an `Experiment` to make sure
 
 It's hard to imagine that it's been years since we created this package. The following tips are what we learned during this period:
 
-1. Keep interfaces stupid simple and minimal
+1. Keep interfaces simple and minimal
 
     Adding new APIs is very cheap, but soon you will be the only one who knows
-    how to use them. Keeping APIs stupid simple and minimal will force you
+    how to use them. Keeping APIs simple and minimal will force you
     rethink your existing design and come up with a more natural one. Actually,
     the multi-dispatch in Julia encourages you to generalize the interfaces as
     much as possible.

diff --git a/src/ReinforcementLearningBase/src/CommonRLInterface.jl b/src/ReinforcementLearningBase/src/CommonRLInterface.jl
@@ -94,4 +94,12 @@ ActionStyle(env::RLBaseEnv) =
     CRL.provided(CRL.valid_actions, env.env) ? FullActionSet() : MinimalActionSet()
 
 current_player(env::RLBaseEnv) = CRL.player(env.env)
-players(env::RLBaseEnv) = CRL.players(env.env)
+players(env::RLBaseEnv) = CRL.players(env.env)
+
+# 
+"""
+    next_player!(env::E) where {E<:AbstractEnv}
+
+Advance to the next player. This is a no-op for single-player and simultaneous games. `Sequential` `MultiAgent` games should implement this method.
+"""
+next_player!(env::E) where {E<:AbstractEnv} = nothing
diff --git a/src/ReinforcementLearningBase/src/base.jl b/src/ReinforcementLearningBase/src/base.jl
@@ -109,6 +109,9 @@ function test_interfaces!(env)
             @test state(Y) == state(X)
             @test reward(Y) == reward(X)
             @test is_terminated(Y) == is_terminated(X)
+
+            RLBase.next_player!(X)
+            RLBase.next_player!(Y)
         end
     end
 
@@ -151,17 +154,21 @@ function test_interfaces!(env)
             reset!(env)
             rewards = [0.0 for p in players(env)]
             while !is_terminated(env)
-                if InformationStyle(env) === PERFECT_INFORMATION
-                    for p in players(env)
+                for p in players(env)
+                    if InformationStyle(env) === PERFECT_INFORMATION
                         @test state(env) == state(env, p)
                     end
+                    # TODO: Make this test more specific...
+                    @test !(RLBase.legal_action_space(env, p) isa Nothing)
+                    @test !(RLBase.legal_action_space(env) isa Nothing)
                 end
-                a = rand(rng, legal_action_space(env))
+                a = rand(rng, RLBase.legal_action_space(env))
                 env(a)
                 for (i, p) in enumerate(players(env))
                     @test state(env, p) ∈ state_space(env, p)
                     rewards[i] += reward(env, p)
                 end
+                next_player!(env)
             end
             # even the game is already terminated
             # make sure each player can still get some necessary info
@@ -207,6 +214,8 @@ function test_runnable!(env, n=1000; rng=Random.GLOBAL_RNG)
             s = state(env)
             @test s in S
             env(a)
+            next_player!(env)
+
             if is_terminated(env)
                 reset!(env)
             end

diff --git a/src/ReinforcementLearningBase/src/interface.jl b/src/ReinforcementLearningBase/src/interface.jl
@@ -102,7 +102,7 @@ abstract type AbstractNumAgentStyle <: AbstractEnvStyle end
 `n` must be ≥ 2.
 """
 function MultiAgent(n::Integer)
-    if n < 0
+    if n < 1
         throw(ArgumentError("number of agents must be > 1, get $n"))
     elseif n == 1
         throw(ArgumentError("do you mean `SINGLE_AGENT`?"))
@@ -561,6 +561,7 @@ Treat the `env` as a game tree. Create an independent child after applying
 @api function child(env::AbstractEnv, action)
     new_env = copy(env)
     new_env(action)
+    next_player!(new_env) # NoOp for simultaneous games and single player games
     new_env
 end
 

diff --git a/src/ReinforcementLearningBase/test/interface.jl b/src/ReinforcementLearningBase/test/interface.jl
@@ -0,0 +1,42 @@
+using ReinforcementLearningBase
+
+struct TestEnv <: RLBase.AbstractEnv
+    state::Int
+end
+
+function RLBase.state(env::TestEnv, ::Observation{Any}, ::DefaultPlayer)
+    return env.state
+end
+
+@testset "MultiAgent" begin
+    @test MultiAgent(2) isa MultiAgent
+    @test_throws ArgumentError MultiAgent(1) 
+    @test_throws ArgumentError MultiAgent(-1)
+end
+
+@testset "InformationSet" begin
+    InformationSet() isa RLBase.AbstractStateStyle
+end
+
+@testset "InternalState" begin
+    InternalState() isa RLBase.AbstractStateStyle
+end
+
+@testset "Observation" begin
+    Observation() isa RLBase.AbstractStateStyle
+end
+
+@testset "EpisodeStyle" begin
+    EpisodeStyle(TestEnv(10)) isa RLBase.AbstractEpisodeStyle
+end
+
+@testset "AbstractEnv" begin
+    @test TestEnv(10) isa RLBase.AbstractEnv
+    @test TestEnv(10) == TestEnv(10)
+    @test Base.hash(TestEnv(10), UInt64(0)) == Base.hash(TestEnv(10), UInt64(0))
+end
+
+@testset "players" begin
+    @test simultaneous_player(TestEnv(10)) == SimultaneousPlayer()
+    @test RLBase.players(TestEnv(10)) == (DefaultPlayer(),)
+end
diff --git a/src/ReinforcementLearningBase/test/runtests.jl b/src/ReinforcementLearningBase/test/runtests.jl
@@ -8,5 +8,6 @@ using POMDPs
 using POMDPTools: Deterministic
 
 @testset "ReinforcementLearningBase" begin
-include("CommonRLInterface.jl")
+    include("CommonRLInterface.jl")
+    include("interface.jl")
 end
diff --git a/src/ReinforcementLearningCore/src/policies/agent/agent.jl b/src/ReinforcementLearningCore/src/policies/agent/agent.jl
@@ -0,0 +1,3 @@
+include("base.jl")
+include("agent_srt_cache.jl")
+include("multi_agent.jl")
diff --git a/...rningCore/src/policies/agent_srt_cache.jl → ...ore/src/policies/agent/agent_srt_cache.jl b/...rningCore/src/policies/agent_srt_cache.jl → ...ore/src/policies/agent/agent_srt_cache.jl
diff --git a/...rcementLearningCore/src/policies/agent.jl → ...ntLearningCore/src/policies/agent/base.jl b/...rcementLearningCore/src/policies/agent.jl → ...ntLearningCore/src/policies/agent/base.jl