In [1]:
import gym
import torch
import pyglet
import minihack

import numpy as np

from nle import nethack
from minihack import RewardManager
from gym.envs.classic_control import rendering

In [2]:
# constants
# ---------

SEED = 0 # random seed value

# actions for the environment
MOVE_ACTIONS = tuple(nethack.CompassDirection)
NAVIGATE_ACTIONS = MOVE_ACTIONS + (
    nethack.Command.OPEN,
    nethack.Command.KICK,
    nethack.Command.SEARCH,
)

# maximum number of steps per episode
MAX_EPISODE_STEPS = 1000

In [3]:
# if there is a Cuda GPU, then we want to use it
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# wrapper for rendering the env as an image
class RenderingWrapper(gym.Wrapper):

    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.viewer = rendering.SimpleImageViewer()
        self.viewer.width = 1280
        self.viewer.height = 520
        self.viewer.window = pyglet.window.Window(
            width=self.viewer.width, 
            height=self.viewer.height,
            display=self.viewer.display, 
            vsync=False, 
            resizable=True
        )

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.pixels = obs['pixel']
        return obs, reward, done, info

    def render(self, mode="human", **kwargs):
        if mode == 'human':
            self.viewer.imshow(self.pixels)
            return self.viewer.isopen
        else:
            return self.env.render()

    def reset(self):
        obs = self.env.reset()
        self.pixels = obs['pixel']
        return obs

    def close(self):
        if self.viewer is not None:
            self.viewer.window.close()
            self.viewer.close()
            self.viewer = None

In [5]:
# create the environment
# https://minihack.readthedocs.io/en/latest/envs/skills/quest.html

# setup the reward manager
# https://minihack.readthedocs.io/en/latest/getting-started/reward.html?highlight=RewardManager#reward-manager
reward_manager = RewardManager()
reward_manager.add_kill_event("minotaur", reward=10)
reward_manager.add_kill_event("goblin", reward=1)
reward_manager.add_kill_event("jackal", reward=1)
reward_manager.add_kill_event("giant rat", reward=1)

# make the environment
env = gym.make(
    "MiniHack-Quest-Hard-v0",
    actions=NAVIGATE_ACTIONS,
    reward_manager=reward_manager,
    observation_keys=("glyphs", "pixel"),
)
env.seed(SEED)

# wrappers
env = RenderingWrapper(env)

In [6]:
_ = env.reset()

In [7]:
from time import sleep

for i in range(100):

    action = np.random.choice(env.action_space.n)
    obs, reward, done, info = env.step(action)

    env.render()
    sleep(0.05)

env.close()