In [3]:
#
# From https://minerl.io/docs/environments/index.html#id9
#
import gym
from gym import spaces
import minerl
import logging
import numpy as np
import collections
from itertools import chain
%run DQNAgent.ipynb

In [4]:
device = torch.device("cpu")

In [3]:
minerl.data.download('./experiments',experiment='MineRLNavigateDense-v0')

As of MineRL 0.3.0 automatic hash checking has been disabled.
Download: https://minerl.s3.amazonaws.com/v3/MineRLNavigateDense-v0.tar: 100%|▉| 633.0/633.56928 [00:26<00:00, 28.68MB/

'./experiments'

  colour=colour)
Download: https://minerl.s3.amazonaws.com/v3/MineRLNavigateDense-v0.tar: 100%|█| 634.0/633.56928 [00:39<00:00, 28.68MB/

In [2]:
env = gym.make("MineRLNavigateDense-v0") # A MineRLNavigateDense-v0 env

In [7]:
%run CameraConverter.ipynb
cc = CameraConverter(45)
modified_action_space = spaces.Dict({
    "attack": spaces.Discrete(2),
    "back": spaces.Discrete(2),
    "camera": cc.discrete_space,
    "forward": spaces.Discrete(2),
    "jump": spaces.Discrete(2),
    "left": spaces.Discrete(2),
    "right": spaces.Discrete(2),
    "sneak": spaces.Discrete(2),
    "sprint": spaces.Discrete(2)
})

agent_config = {
    "EPS": .99,
    "EPS_DECAY": .995,
    "GAMMA": 1,
    "MEMORY_CAPACITY": 100000,
    "MEMORY_ALPHA": .7,
    "MEMORY_BETA": .4,
    "MEMORY_EPSILON": .001,
    "BATCH_SIZE": 64
}
agent = DQNAgent(env.observation_space, modified_action_space, agent_config)

In [14]:
# Run a random agent through the environment
def train(env, agent, config):
    
    num_episodes = config["NUM_EPISODES"]
    episodic_rewards = [0] * num_episodes
    episodic_epsilon = [0] * num_episodes
    episodic_moves = [0] * num_episodes
    
    for episode in range(num_episodes):
        obs = env.reset()
        done = False
        net_reward = 0
        steps = 0
        pov = obs["pov"].reshape(3,64,64)
        while not done:
            pov_tensor = torch.from_numpy(current_state["pov"][steps].reshape(3,64,64)).float().to(device).unsqueeze(0)
    
            action = agent.step(pov_tensor)
    
            env_action = action.copy()
            env_action['camera'] = [0,0.03*obs[compassAngle]]
    
            steps = steps + 1
            obs, reward, done, info = env.step(env_action)
    
            next_pov = obs["pov"].reshape(3,64,64)
            if done:
                next_pov_tensor = None
            else:
                next_pov_tensor = torch.from_numpy(next_pov).float().to(device).unsqueeze(0)
    
            flat_actions = np.hstack(list(action.values()))
            camera_flat = np.hstack(list(flat_actions[2].values()))
            flat_actions = np.delete(flat_actions, 2)
            flat_actions = np.insert(flat_actions, 2, camera_flat).astype(int)
    
            action_tensor = torch.from_numpy(flat_actions).to(device).float().unsqueeze(0)
            reward_tensor = torch.tensor([reward], device=device)
            agent.memory.store_transition(Transition(pov_tensor, action_tensor, next_pov_tensor, reward_tensor))
            if steps % config["EVAL_INTERVAL"] == 0:
                    agent.evaluate(pov_tensor, action, next_pov_tensor, reward)
            pov = next_pov     
            episodic_rewards[episode] += reward
            episodic_moves[episode] += 1
    
        if episode % config["SYNC_INTERVAL"] == 0:
            agent.agent_sync_networks()
        episodic_epsilon[episode] = agent.epsilon
        agent.decay_epsilon()
    return episodic_rewards, episodic_epsilon, episodic_moves        
     
    # Do something

In [None]:
train_config = {
    "EVAL_INTERVAL": 10,
    "SYNC_INTERVAL": 5,
    "NUM_EPISODES": 10
}
rew, ep, moves = train(env, agent, train_config)

In [10]:
# Sample some data from the dataset!
data = minerl.data.make("MineRLNavigateDense-v0")

<minerl.data.data_pipeline.DataPipeline object at 0x000002C46FA91A48>


In [25]:
# Iterate through a single epoch using sequences of at most 32 steps
for current_state, action, reward, next_state, done in data.batch_iter(num_epochs=1, batch_size=1, seq_len=32):
    print(current_state)
    break

100%|██████████████████████████████████████████████████████████████████████████████| 193/193 [00:00<00:00, 9882.44it/s]


{'compassAngle': array([[-43.5215664, -41.145048 , -37.93536  , -34.293996 , -30.5848224,
        -27.1476   , -24.223572 , -24.223572 , -22.0448412, -20.770164 ,
        -20.4110532, -20.91204  , -20.91204  , -22.399884 , -24.6656376,
        -27.4457376, -30.464748 , -33.4677024, -36.2040696, -38.50038  ,
        -40.2195744, -41.31306  , -41.8019904, -41.7582612, -41.2861932,
        -40.468392 , -39.4126812, -38.233152 , -37.0004508, -35.79246  ,
        -34.681356 , -33.724638 ]]), 'inventory': {'dirt': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}, 'pov': array([[[[[42, 48, 39],
          [ 0,  3,  0],
          [ 0,  0,  0],
          ...,
          [ 4,  4, 12],
          [ 4,  4, 12],
          [ 4,  4, 12]],

         [[42, 48, 39],
          [ 0,  2,  0],
          [ 0,  0,  0],
          ...,
          [ 4,  4, 12],
          [ 4,  4, 12],
          [ 4,  4, 12]],

         [[38, 49, 34],
          [39, 50