In [1]:
print('PyDev console: using IPython 7.5.0\n')


import sys; print('Python %s on %s' % (sys.version, sys.platform))
sys.path.extend(['/home/dang/Documents/Doc/2021/mine'])

In [2]:


import time
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from torch.utils.tensorboard import SummaryWriter
import minerl  # it's important to import minerl after SB3, otherwise model.save doesn't work...
import cv2
import numpy as np
from low_upper import hsv_bond, all_names, HMaxs_bond, HMins_bond, SMaxs_bond, SMins_bond, VMaxs_bond, VMins_bond

try:
    wandb = None
    import wandb
except ImportError:
    pass

class PovOnlyObservation(gym.ObservationWrapper):

    def __init__(self, env):
        super().__init__(env)
        # 64 x 64 + 64 x64 + 18 [cpa;, cobblestone]
        self.observation_space = gym.spaces.Discrete(8210)
        self.names_stack = []
        self.max_name = 64
        self.max_item = 50

    def observation(self, observation):
        image = observation['pov']
        final = np.zeros((64,64))
        hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
        h = np.expand_dims(hsv[:, :, 0], axis=-1)
        s = np.expand_dims(hsv[:, :, 1], axis=-1)
        v = np.expand_dims(hsv[:, :, 2], axis=-1)
        h = np.logical_and(h >= HMins_bond, h <= HMaxs_bond)
        s = np.logical_and(s >= SMins_bond, s <= SMaxs_bond)
        v = np.logical_and(v >= VMins_bond, v <= VMaxs_bond)
        tmp = np.logical_and(h, s)
        tmp = np.logical_and(tmp, v)

        for i in range(64):
            for j in range(64):
                data = tmp[i, j, :]
                index_x = np.argmax(data)
                name = all_names[index_x]
                if name in ['grass', 'grass_1', 'grass_2', 'grass_3', 'grass_4',
                            'grass_5', 'grass_6', 'grass_7', 'grass_8', 'grass_9',
                            'grass_10', 'grass_11', 'grass_12']:
                    name = 'grass'
                elif name in ['leaf_1', 'leaf_2', 'leaf_3', 'leaf_3_far', 'leaf_4',
                              'leaf_4_extra', 'leaf_4_extra_2', 'leaf_5', 'leaf_6', 'leaf_7'
                    , 'leaf_8', 'leaf_4_extra_3', 'leaf_4_extra_4']:
                    name = 'leaf_1'
                elif name in ['chunk_2_grey', 'chunk_2_brown', 'chunk_3_brown', 'chunk_3_brown_dark',
                              'chunk_4_grey', 'chunk_5_grey', 'chunk_6_brown', 'chunk_7_brown']:
                    name = 'chunk'
                elif name in ['rock', 'rock_2', 'rock_3', 'rock_dark_yellow',
                              'copper', 'rock_dark_brown']:
                    name = 'rock'
                elif name in ['water', 'water_deep']:
                    name = 'water'
                elif name in ['dirt', 'dirt_far', 'dirt_water', 'red_dirt']:
                    name = 'dirt'
                elif name in ['sand', 'sand_water']:
                    name = 'dirt'
                elif name in ['water', 'water_under']:
                    name = 'dirt'
                if name in self.names_stack:
                    index = np.where(np.array(self.names_stack) == name)[0][0]
                else:
                    self.names_stack.append(name)
                    index = len(self.names_stack)
                final[i, j] = index

        final = np.ndarray.flatten(final / self.max_name)
        v = np.ndarray.flatten(hsv[:, :, 2] / 255)
        inventory = list(observation['inventory'].values())
        inventory = np.ndarray.flatten(np.array(inventory))/ self.max_item
        inventory = np.clip(inventory, 0, 1)
        obs = np.concatenate([final, v, inventory])

        return obs

class ActionShaping(gym.ActionWrapper):

    def __init__(self, env, camera_angle=10, always_attack=False):
        super().__init__(env)

        self.camera_angle = camera_angle
        self.always_attack = always_attack
        self._actions = [
            [('attack', 1)],
            [('forward', 1)],
            # [('back', 1)],
            # [('left', 1)],
            # [('right', 1)],
            # [('jump', 1)],
            # [('forward', 1), ('attack', 1)],
            # [('craft', 'planks')],
            [('forward', 1), ('jump', 1)],
            [('camera', [-self.camera_angle, 0])],
            [('camera', [self.camera_angle, 0])],
            [('camera', [0, self.camera_angle])],
            [('camera', [0, -self.camera_angle])],
        ]

        self.actions = []
        for actions in self._actions:
            act = self.env.action_space.noop()
            for a, v in actions:
                act[a] = v
            if self.always_attack:
                act['attack'] = 1
            self.actions.append(act)

        self.action_space = gym.spaces.Discrete(len(self.actions))

    def action(self, action):
        return self.actions[action]

def track_exp(project_name=None):
    config = {
        "TRAIN_TIMESTEPS": 2000000,  # number of steps to train the agent for. At 70 FPS 2m steps take about 8 hours.
        "TRAIN_ENV": 'MineRLTreechop-v0',
        # training environment for the RL agent. Could use MineRLObtainDiamondDense-v0 here.
        "TRAIN_MODEL_NAME": 'potato',  # name to use when saving the trained agent.
        "TEST_MODEL_NAME": 'potato',  # name to use when loading the trained agent.
        "TEST_EPISODES": 10,  # number of episodes to test the agent for.
        "MAX_TEST_EPISODE_LEN": 18000,  # 18k is the default for MineRLObtainDiamond.
        "TREECHOP_STEPS": 2000,  # number of steps to run RL lumberjack for in evaluations.
        "RECORD_TRAINING_VIDEOS": False,  # if True, records videos of all episodes done during training.
        "RECORD_TEST_VIDEOS": False,  # if True, records videos of all episodes done during evaluation.
    }
    wandb.init(
        anonymous="allow",
        project=project_name,
        config=config,
        sync_tensorboard=True,
        name='v1',
        monitor_gym=True,
        save_code=True,
    )


def make_env(idx):
    def thunk():
        env = gym.make('MineRLObtainDiamond-v0')
        if idx == 0 and False:
            env = gym.wrappers.Monitor(env, f"train_videos/{'v1'}")
        env = PovOnlyObservation(env)
        env = ActionShaping(env, always_attack=True)
        env = gym.wrappers.RecordEpisodeStatistics(env)  # record stats such as returns
        return env
    return thunk

track_exp(project_name="minerl")

env = DummyVecEnv([make_env(i) for i in range(1)])
# For all the PPO hyperparameters you could tune see this:
# https://github.com/DLR-RM/stable-baselines3/blob/6f822b9ed7d6e8f57e5a58059923a5b24e8db283/stable_baselines3/ppo/ppo.py#L16
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=f"runs/{'v1'}")
model.learn(total_timesteps=2000000)  # 2m steps is about 8h at 70 FPS
model.save('ppo_first')

# MineRL might throw an exception when closing on Windows, but it can be ignored (the environment does close).
try:
    env.close()
except Exception:
    pass

<IPython.core.display.HTML object>

In [3]:
env = DummyVecEnv([make_env(i) for i in range(1)])
# For all the PPO hyperparameters you could tune see this:
# https://github.com/DLR-RM/stable-baselines3/blob/6f822b9ed7d6e8f57e5a58059923a5b24e8db283/stable_baselines3/ppo/ppo.py#L16
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f"runs/{'v1'}")
model.learn(total_timesteps=2000000)  # 2m steps is about 8h at 70 FPS
model.save('ppo_first')

In [4]:

track_exp(project_name="minerl")

env = DummyVecEnv([make_env(i) for i in range(1)])
# For all the PPO hyperparameters you could tune see this:
# https://github.com/DLR-RM/stable-baselines3/blob/6f822b9ed7d6e8f57e5a58059923a5b24e8db283/stable_baselines3/ppo/ppo.py#L16
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f"runs/{'v1'}")
model.learn(total_timesteps=2000000)  # 2m steps is about 8h at 70 FPS
model.save('ppo_first')