Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add custom env examples #212

Merged
merged 1 commit into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ Currently, the features supported by OpenRL include:

- Importing models and datasets from [Hugging Face](https://huggingface.co/)

- [Tutorial](https://openrl-docs.readthedocs.io/en/latest/custom_env/index.html) on how to integrate user-defined environments into OpenRL.

- Support for models such as LSTM, GRU, Transformer etc.

- Multiple training acceleration methods including automatic mixed precision training and data collecting wth half
Expand Down
1 change: 1 addition & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ OpenRL基于PyTorch进行开发,目标是为强化学习研究社区提供一
- 支持自然语言任务(如对话任务)的强化学习训练
- 支持[竞技场](https://openrl-docs.readthedocs.io/zh/latest/arena/index.html)功能,可以在多智能体对抗性环境中方便地对各种智能体进行评测。
- 支持从[Hugging Face](https://huggingface.co/)上导入模型和数据
- 提供用户自有环境接入OpenRL的[详细教程](https://openrl-docs.readthedocs.io/zh/latest/custom_env/index.html).
- 支持LSTM,GRU,Transformer等模型
- 支持多种训练加速,例如:自动混合精度训练,半精度策略网络收集数据等
- 支持用户自定义训练模型、奖励模型、训练数据以及环境
Expand Down
8 changes: 8 additions & 0 deletions examples/custom_env/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Integrate user-defined environments into OpenRL


Here, we provide several toy examples to show how to add user-defined environments into OpenRL.

- `gymnasium_env.py`: a simple example to show how to create a Gymnasium environment and integrate it into OpenRL.
- `openai_gym_env.py`: a simple example to show how to create a OpenAI Gym environment and integrate it into OpenRL.
- `pettingzoo_env.py`: a simple example to show how to create a PettingZoo environment and integrate it into OpenRL.
80 changes: 80 additions & 0 deletions examples/custom_env/gymnasium_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""

from typing import Any, Dict, Optional

import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import EnvSpec, register
from gymnasium.utils import seeding
from train_and_test import train_and_test

from openrl.envs.common import make


class IdentityEnv(gym.Env):
spec = EnvSpec("IdentityEnv")

def __init__(self, **kwargs):
self.dim = 2
self.observation_space = spaces.Discrete(1)
self.action_space = spaces.Discrete(self.dim)
self.ep_length = 5
self.current_step = 0

def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[Dict[str, Any]] = None,
):
if seed is not None:
self.seed(seed)
self.current_step = 0
self.generate_state()
return self.state, {}

def step(self, action):
reward = 1
self.generate_state()
self.current_step += 1
done = self.current_step >= self.ep_length
return self.state, reward, done, {}

def generate_state(self) -> None:
self.state = [self._np_random.integers(0, self.dim)]

def render(self, mode: str = "human") -> None:
pass

def seed(self, seed: Optional[int] = None) -> None:
if seed is not None:
self._np_random, seed = seeding.np_random(seed)

def close(self):
pass


register(
id="Custom_Env/IdentityEnv",
entry_point="gymnasium_env:IdentityEnv",
)

env = make("Custom_Env/IdentityEnv", env_num=10)

train_and_test(env)
80 changes: 80 additions & 0 deletions examples/custom_env/openai_gym_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""

from typing import Any, Dict, Optional

import gym
from gym import spaces
from gym.envs.registration import EnvSpec, register
from gym.utils import seeding
from train_and_test import train_and_test

from openrl.envs.common import make


class IdentityEnv(gym.Env):
spec = EnvSpec("IdentityEnv-v1")

def __init__(self, **kwargs):
self.dim = 2
self.observation_space = spaces.Discrete(1)
self.action_space = spaces.Discrete(self.dim)
self.ep_length = 5
self.current_step = 0

def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[Dict[str, Any]] = None,
):
if seed is not None:
self.seed(seed)
self.current_step = 0
self.generate_state()
return self.state

def step(self, action):
reward = 1
self.generate_state()
self.current_step += 1
done = self.current_step >= self.ep_length
return self.state, reward, done, {}

def generate_state(self) -> None:
self.state = [self._np_random.randint(0, self.dim - 1)]

def render(self, mode: str = "human") -> None:
pass

def seed(self, seed: Optional[int] = None) -> None:
if seed is not None:
self._np_random, seed = seeding.np_random(seed)

def close(self):
pass


register(
id="Custom_Env/IdentityEnv-v1",
entry_point="openai_gym_env:IdentityEnv",
)

env = make("GymV21Environment-v0:Custom_Env/IdentityEnv-v1", env_num=10)

train_and_test(env)
35 changes: 35 additions & 0 deletions examples/custom_env/pettingzoo_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""


from rock_paper_scissors import RockPaperScissors
from train_and_test import train_and_test

from openrl.envs.common import make
from openrl.envs.PettingZoo.registration import register
from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper

register("RockPaperScissors", RockPaperScissors)
env = make(
"RockPaperScissors",
env_num=10,
opponent_wrappers=[RandomOpponentWrapper],
)
obs, info = env.reset()

train_and_test(env)