Skip to content

Commit

Permalink
add custom env examples
Browse files Browse the repository at this point in the history
add custom env examples
  • Loading branch information
huangshiyu13 committed Aug 24, 2023
2 parents 72bc4f7 + ff78415 commit 0abdce9
Show file tree
Hide file tree
Showing 14 changed files with 547 additions and 7 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Currently, the features supported by OpenRL include:

- Importing models and datasets from [Hugging Face](https://huggingface.co/)

- [Tutorial](https://openrl-docs.readthedocs.io/en/latest/custom_env/index.html) on how to integrate user-defined environments into OpenRL.

- Support for models such as LSTM, GRU, Transformer etc.

- Multiple training acceleration methods including automatic mixed precision training and data collecting wth half
Expand Down
1 change: 1 addition & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ OpenRL基于PyTorch进行开发,目标是为强化学习研究社区提供一
- 支持自然语言任务(如对话任务)的强化学习训练
- 支持[竞技场](https://openrl-docs.readthedocs.io/zh/latest/arena/index.html)功能,可以在多智能体对抗性环境中方便地对各种智能体进行评测。
- 支持从[Hugging Face](https://huggingface.co/)上导入模型和数据
- 提供用户自有环境接入OpenRL的[详细教程](https://openrl-docs.readthedocs.io/zh/latest/custom_env/index.html).
- 支持LSTM,GRU,Transformer等模型
- 支持多种训练加速,例如:自动混合精度训练,半精度策略网络收集数据等
- 支持用户自定义训练模型、奖励模型、训练数据以及环境
Expand Down
8 changes: 8 additions & 0 deletions examples/custom_env/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Integrate user-defined environments into OpenRL


Here, we provide several toy examples to show how to add user-defined environments into OpenRL.

- `gymnasium_env.py`: a simple example to show how to create a Gymnasium environment and integrate it into OpenRL.
- `openai_gym_env.py`: a simple example to show how to create a OpenAI Gym environment and integrate it into OpenRL.
- `pettingzoo_env.py`: a simple example to show how to create a PettingZoo environment and integrate it into OpenRL.
80 changes: 80 additions & 0 deletions examples/custom_env/gymnasium_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""

from typing import Any, Dict, Optional

import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import EnvSpec, register
from gymnasium.utils import seeding
from train_and_test import train_and_test

from openrl.envs.common import make


class IdentityEnv(gym.Env):
spec = EnvSpec("IdentityEnv")

def __init__(self, **kwargs):
self.dim = 2
self.observation_space = spaces.Discrete(1)
self.action_space = spaces.Discrete(self.dim)
self.ep_length = 5
self.current_step = 0

def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[Dict[str, Any]] = None,
):
if seed is not None:
self.seed(seed)
self.current_step = 0
self.generate_state()
return self.state, {}

def step(self, action):
reward = 1
self.generate_state()
self.current_step += 1
done = self.current_step >= self.ep_length
return self.state, reward, done, {}

def generate_state(self) -> None:
self.state = [self._np_random.integers(0, self.dim)]

def render(self, mode: str = "human") -> None:
pass

def seed(self, seed: Optional[int] = None) -> None:
if seed is not None:
self._np_random, seed = seeding.np_random(seed)

def close(self):
pass


register(
id="Custom_Env/IdentityEnv",
entry_point="gymnasium_env:IdentityEnv",
)

env = make("Custom_Env/IdentityEnv", env_num=10)

train_and_test(env)
80 changes: 80 additions & 0 deletions examples/custom_env/openai_gym_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""

from typing import Any, Dict, Optional

import gym
from gym import spaces
from gym.envs.registration import EnvSpec, register
from gym.utils import seeding
from train_and_test import train_and_test

from openrl.envs.common import make


class IdentityEnv(gym.Env):
spec = EnvSpec("IdentityEnv-v1")

def __init__(self, **kwargs):
self.dim = 2
self.observation_space = spaces.Discrete(1)
self.action_space = spaces.Discrete(self.dim)
self.ep_length = 5
self.current_step = 0

def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[Dict[str, Any]] = None,
):
if seed is not None:
self.seed(seed)
self.current_step = 0
self.generate_state()
return self.state

def step(self, action):
reward = 1
self.generate_state()
self.current_step += 1
done = self.current_step >= self.ep_length
return self.state, reward, done, {}

def generate_state(self) -> None:
self.state = [self._np_random.randint(0, self.dim - 1)]

def render(self, mode: str = "human") -> None:
pass

def seed(self, seed: Optional[int] = None) -> None:
if seed is not None:
self._np_random, seed = seeding.np_random(seed)

def close(self):
pass


register(
id="Custom_Env/IdentityEnv-v1",
entry_point="openai_gym_env:IdentityEnv",
)

env = make("GymV21Environment-v0:Custom_Env/IdentityEnv-v1", env_num=10)

train_and_test(env)
35 changes: 35 additions & 0 deletions examples/custom_env/pettingzoo_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""


from rock_paper_scissors import RockPaperScissors
from train_and_test import train_and_test

from openrl.envs.common import make
from openrl.envs.PettingZoo.registration import register
from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper

register("RockPaperScissors", RockPaperScissors)
env = make(
"RockPaperScissors",
env_num=10,
opponent_wrappers=[RandomOpponentWrapper],
)
obs, info = env.reset()

train_and_test(env)

0 comments on commit 0abdce9

Please sign in to comment.