add custom env examples

OpenRL-Lab · Aug 24, 2023 · 0abdce9 · 0abdce9
2 parents 72bc4f7 + ff78415
commit 0abdce9
Show file tree

Hide file tree

Showing 14 changed files with 547 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -63,6 +63,8 @@ Currently, the features supported by OpenRL include:
 
 - Importing models and datasets from [Hugging Face](https://huggingface.co/)
 
+- [Tutorial](https://openrl-docs.readthedocs.io/en/latest/custom_env/index.html) on how to integrate user-defined environments into OpenRL.
+
 - Support for models such as LSTM, GRU, Transformer etc.
 
 - Multiple training acceleration methods including automatic mixed precision training and data collecting wth half

diff --git a/README_zh.md b/README_zh.md
@@ -53,6 +53,7 @@ OpenRL基于PyTorch进行开发，目标是为强化学习研究社区提供一
 - 支持自然语言任务（如对话任务）的强化学习训练
 - 支持[竞技场](https://openrl-docs.readthedocs.io/zh/latest/arena/index.html)功能，可以在多智能体对抗性环境中方便地对各种智能体进行评测。
 - 支持从[Hugging Face](https://huggingface.co/)上导入模型和数据
+- 提供用户自有环境接入OpenRL的[详细教程](https://openrl-docs.readthedocs.io/zh/latest/custom_env/index.html).
 - 支持LSTM，GRU，Transformer等模型
 - 支持多种训练加速，例如：自动混合精度训练，半精度策略网络收集数据等
 - 支持用户自定义训练模型、奖励模型、训练数据以及环境

diff --git a/examples/custom_env/README.md b/examples/custom_env/README.md
@@ -0,0 +1,8 @@
+# Integrate user-defined environments into OpenRL
+
+
+Here, we provide several toy examples to show how to add user-defined environments into OpenRL.
+
+- `gymnasium_env.py`: a simple example to show how to create a Gymnasium environment and integrate it into OpenRL.
+- `openai_gym_env.py`: a simple example to show how to create a OpenAI Gym environment and integrate it into OpenRL.
+- `pettingzoo_env.py`: a simple example to show how to create a PettingZoo environment and integrate it into OpenRL.
diff --git a/examples/custom_env/gymnasium_env.py b/examples/custom_env/gymnasium_env.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+from typing import Any, Dict, Optional
+
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.registration import EnvSpec, register
+from gymnasium.utils import seeding
+from train_and_test import train_and_test
+
+from openrl.envs.common import make
+
+
+class IdentityEnv(gym.Env):
+    spec = EnvSpec("IdentityEnv")
+
+    def __init__(self, **kwargs):
+        self.dim = 2
+        self.observation_space = spaces.Discrete(1)
+        self.action_space = spaces.Discrete(self.dim)
+        self.ep_length = 5
+        self.current_step = 0
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[Dict[str, Any]] = None,
+    ):
+        if seed is not None:
+            self.seed(seed)
+        self.current_step = 0
+        self.generate_state()
+        return self.state, {}
+
+    def step(self, action):
+        reward = 1
+        self.generate_state()
+        self.current_step += 1
+        done = self.current_step >= self.ep_length
+        return self.state, reward, done, {}
+
+    def generate_state(self) -> None:
+        self.state = [self._np_random.integers(0, self.dim)]
+
+    def render(self, mode: str = "human") -> None:
+        pass
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        if seed is not None:
+            self._np_random, seed = seeding.np_random(seed)
+
+    def close(self):
+        pass
+
+
+register(
+    id="Custom_Env/IdentityEnv",
+    entry_point="gymnasium_env:IdentityEnv",
+)
+
+env = make("Custom_Env/IdentityEnv", env_num=10)
+
+train_and_test(env)
diff --git a/examples/custom_env/openai_gym_env.py b/examples/custom_env/openai_gym_env.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+from typing import Any, Dict, Optional
+
+import gym
+from gym import spaces
+from gym.envs.registration import EnvSpec, register
+from gym.utils import seeding
+from train_and_test import train_and_test
+
+from openrl.envs.common import make
+
+
+class IdentityEnv(gym.Env):
+    spec = EnvSpec("IdentityEnv-v1")
+
+    def __init__(self, **kwargs):
+        self.dim = 2
+        self.observation_space = spaces.Discrete(1)
+        self.action_space = spaces.Discrete(self.dim)
+        self.ep_length = 5
+        self.current_step = 0
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[Dict[str, Any]] = None,
+    ):
+        if seed is not None:
+            self.seed(seed)
+        self.current_step = 0
+        self.generate_state()
+        return self.state
+
+    def step(self, action):
+        reward = 1
+        self.generate_state()
+        self.current_step += 1
+        done = self.current_step >= self.ep_length
+        return self.state, reward, done, {}
+
+    def generate_state(self) -> None:
+        self.state = [self._np_random.randint(0, self.dim - 1)]
+
+    def render(self, mode: str = "human") -> None:
+        pass
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        if seed is not None:
+            self._np_random, seed = seeding.np_random(seed)
+
+    def close(self):
+        pass
+
+
+register(
+    id="Custom_Env/IdentityEnv-v1",
+    entry_point="openai_gym_env:IdentityEnv",
+)
+
+env = make("GymV21Environment-v0:Custom_Env/IdentityEnv-v1", env_num=10)
+
+train_and_test(env)
diff --git a/examples/custom_env/pettingzoo_env.py b/examples/custom_env/pettingzoo_env.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+
+
+from rock_paper_scissors import RockPaperScissors
+from train_and_test import train_and_test
+
+from openrl.envs.common import make
+from openrl.envs.PettingZoo.registration import register
+from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper
+
+register("RockPaperScissors", RockPaperScissors)
+env = make(
+    "RockPaperScissors",
+    env_num=10,
+    opponent_wrappers=[RandomOpponentWrapper],
+)
+obs, info = env.reset()
+
+train_and_test(env)