In [125]:
from mlagents_envs.environment import UnityEnvironment

In [126]:
from pyvirtualdisplay import Display

In [150]:
display = Display(backend="xvnc", size=(64,64), visible=0, rfbport=5901)
display.start()

<pyvirtualdisplay.display.Display at 0x7f8c300bbbd0>

In [151]:
display.stop()

<pyvirtualdisplay.display.Display at 0x7f8c300bbbd0>

In [71]:
env = UnityEnvironment(file_name="RC.x86_64", seed=1, timeout_wait=1000)

UnityTimeOutException: The Unity environment took too long to respond. Make sure that :
	 The environment does not need user interaction to launch
	 The Agents' Behavior Parameters > Behavior Type is set to "Default"
	 The environment and the Python interface have compatible versions.

In [92]:
filepaths='./Datasets/50k/actor-26-traj-1.pkl'
if not os.path.exists(filepaths):
    os.makedirs(filepaths)

In [None]:
env.reset()

In [10]:
env.step()

# Env Utils

In [1]:
from typing import List
import torch
import torch.multiprocessing as mp
import numpy as np
import os.path
import shutil
from PIL import Image, ImageDraw
from collections import deque, defaultdict, namedtuple
import pdb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Counter:
  def __init__(self, init_val: int = 0):
    self._val = mp.RawValue("i", init_val)
    self._lock = mp.Lock()

  def increment(self):
    with self._lock:
      self._val.value += 1

  @property
  def value(self):
    with self._lock:
      return self._val.value

class Timer():
  def __init__(self, maxsize=20):
    self.values = deque(maxlen=maxsize)
    self.count = 0.
    self.sum = 0.

  def update(self, value):
    self.values.append(value)
    self.sum += value
    self.count += 1

  @property
  def avg(self):
    return np.mean(self.values)

  @property
  def global_avg(self):
    return self.sum / self.count

class MetricLogger():
  def __init__(self):
    self.values = defaultdict(Timer)

  def update(self, **kargs):
    for key, value in kargs.items():
      self.values[key].update(value)

  def __getitem__(self, key):
    return self.values[key]

  def __setitem__(self, key, value):
    self.values[key].update(value)

def dec_agent_pos_rot(pos_rot_vec):
  pos = pos_rot_vec[:3]
  rot = pos_rot_vec[3:]
  return pos, rot

def dec_building_pos_scale(pos_scale_vec):
  pos = pos_scale_vec[:27].reshape(9, 3)
  scale = pos_scale_vec[27:54].reshape(9, 3)
  
  return pos, scale

def comp_min_distance_angle(agent_pos, building_pos, building_scale):
  """
  agent_pos: (3,)
  building_pos: (9, 3)
  building_scale: (9, 3)
  
  compute agent and building distance on x-z plane.
  """
  
  num_building = np.sum(np.sum(building_pos, axis=1) != 0)
  
  dis = (agent_pos - building_pos)**2
  dis = np.sqrt(dis[:, 0] + dis[:, 2])
  
  building_radius = (building_scale / 2.)**2
  building_radius = np.sqrt(building_radius[:, 0] + building_radius[:, 2])
  
  dis = dis - building_radius + 5. # distance to the building surface
  
  dis_min_idx = np.argmin(dis[:num_building])
  
  min_building_pos = building_pos[dis_min_idx]
  dis_min = dis[dis_min_idx]
  
  x_offset = min_building_pos[0] - agent_pos[0]
  z_offset = min_building_pos[2] - agent_pos[2]
  cos_theta = x_offset / (np.sqrt(x_offset**2 + z_offset**2) + 1e-5)
  sin_theta = z_offset / (np.sqrt(x_offset**2 + z_offset**2) + 1e-5)
  
  theta = np.degrees(np.arcsin(sin_theta))
  if np.isnan(theta):
    pdb.set_trace()
  if cos_theta < 0.:
    theta = 180. - theta
    
  theta = theta % 360.
  return dis_min, theta

def dec_top_down_map(building_pos_scale_color):
  pos = building_pos_scale_color[:27].reshape(9, 3)
  scale = building_pos_scale_color[27:54].reshape(9, 3)
  color = building_pos_scale_color[54:].reshape(9, 3)
  
  num_building = np.sum(np.sum(pos, axis=1) != 0)

  height = scale[:num_building, 1]
  
  top_down_map = np.zeros((num_building, 64, 64, 3))
  
  for i in range(num_building):
    pos_i_x = (pos[i, 0] + 50) / 100 * 64
    pos_i_z = (pos[i, 2] + 50) / 100 * 64
    
    scale_i_x = scale[i, 0] / 100 * 64
    scale_i_z = scale[i, 2] / 100 * 64
    
    top_down_map[i][64 - int(pos_i_z + scale_i_z // 2) : 64 - int(pos_i_z - scale_i_z // 2 + 1),
                   int(pos_i_x - scale_i_x // 2) : int(pos_i_x + scale_i_x // 2 + 1)] = color[i]
    
  sort_idx = np.argsort(height)
  maps = np.zeros((64, 64, 3))

  for idx in sort_idx:
    
    overlap = (maps > 0.) * (top_down_map[idx] > 0.)
    
    maps = maps * (1. - overlap) + top_down_map[idx]
    
  return maps

def plot_traj(map_top_down, agent_pos):
  """
  agent_pos: T, 2
  """
  
  agent_pos = np.array(agent_pos) + 50
  
  im = Image.fromarray((map_top_down * 255).astype(np.uint8))
  draw = ImageDraw.Draw(im)
  num = agent_pos.shape[0]
  start = agent_pos[0]
  for i in range(1, num):
    end = agent_pos[i]
    draw.line(
      (int(start[0] / 100 * 64), int((100 - start[1]-1) / 100 * 64),
        int(end[0] / 100 * 64), int((100 - end[1]-1) / 100 * 64)),
      fill='red',
      width = 1,
    )
    start = end
    
  return np.asarray(im, dtype='int32')

def plot_traj_step(map_top_down, agent_pos):
  """
  agent_pos: T, 2
  map_top_down: H, W, C
  """
  
  agent_pos = np.array(agent_pos) + 50
  
  im = Image.fromarray((map_top_down * 255).astype(np.uint8))
  draw = ImageDraw.Draw(im)
  num = agent_pos.shape[0]
  start = agent_pos[0]
  map_traj_list = []
  for i in range(num):
    end = agent_pos[i]
    draw.line(
      (int(start[0] / 100 * 64), int((100 - start[1]-1) / 100 * 64),
        int(end[0] / 100 * 64), int((100 - end[1]-1) / 100 * 64)),
      fill='red',
      width = 1,
    )
    start = end
    map_traj_list.append(np.asarray(im, dtype='int32'))

  map_traj = np.array(map_traj_list).astype(np.float) / 255. # B, H, W, C
    
  return map_traj

def agent_building_angle(agent_pos, agent_rot, building_pos, building_scale):
  """
  agent_rot: scalar
  agent_pos: (3,)
  building_pos: (9, 3)
  building_scale: (9, 3)
  
  compute agent and building distance on x-z plane.
  """
  
  num_building = np.sum(np.sum(building_pos, axis=1) != 0)
  
  x_offset = building_pos[:num_building, 0] - agent_pos[0]
  z_offset = building_pos[:num_building, 2] - agent_pos[2]
  cos_theta = x_offset / (np.sqrt(x_offset**2 + z_offset**2) + 1e-5)
  sin_theta = z_offset / (np.sqrt(x_offset**2 + z_offset**2) + 1e-5)
  
  theta = np.degrees(np.arcsin(sin_theta))
  if np.isnan(theta).any():
    pdb.set_trace()
  for i in range(num_building):
    if cos_theta[i] < 0.:
      theta[i] = 180. - theta[i]
    
  theta = theta % 360.
  
  agent_rot_ = (-agent_rot + 90) % 360 # opposite rotation direction, agent facing y+ is 0 rotation
  angle_diff = abs(agent_rot_ - theta)

  return theta, angle_diff

In [131]:
import torch
# from actor_unity_gen_data import Actor
# from env_utils import *
import torch.multiprocessing as mp

import os
import pdb

# Utils

In [134]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

# In[ ]:


class ResidualBlock(nn.Module):
    
    def __init__(self, in_channels, hid_channels, out_channels,
                 kernel_size_1, stride_1, padding_1,
                 kernel_size_2, stride_2, padding_2):
        super().__init__()
        
        self.main = nn.Sequential(
            Conv2dBlock(in_channels, hid_channels, kernel_size_1, stride_1, padding_1),
            Conv2dBlock(hid_channels, out_channels, kernel_size_2, stride_2, padding_2))
        
        self.skip = Conv2dBlock(in_channels, out_channels, kernel_size_2, stride_2, padding_2)
    
    
    def forward(self, x):
        
        return self.main(x) + self.skip(x)


# In[22]:


class Conv2dBlock(nn.Module):
    
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super().__init__()
        
        self.m = conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.weight = nn.Parameter(torch.ones(out_channels))
        self.bias = nn.Parameter(torch.zeros(out_channels))
    
    
    def forward(self, x):
        
        x = self.m(x)
        return F.relu(F.group_norm(x, 8, self.weight, self.bias))


# In[ ]:


def conv2d(in_channels, out_channels, kernel_size, stride=1,
           padding=0, dilation=1, groups=1,
           bias=True, padding_mode='zeros',
           weight_init='kaiming'):
    
    m = nn.Conv2d(in_channels, out_channels, kernel_size, stride,
                  padding, dilation, groups, bias, padding_mode)
    
    if weight_init == 'xavier':
        nn.init.xavier_normal_(m.weight)
    else:
        nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
    
    if bias:
        nn.init.zeros_(m.bias)
    
    return m


# In[ ]:


class GaussianWithConv2d(nn.Module):
    
    def __init__(self, c_in, c_out):
        super(GaussianWithConv2d, self).__init__()
        
        self.enc = conv2d(c_in, 2 * c_out, 5, 1, 2, weight_init='xavier')
    
    
    def forward(self, inputs):
        # type: (Tensor) -> Tuple[Tensor, Tensor]
        mu, std = self.enc(inputs).chunk(2, 1)
        std = F.softplus(std) + 1e-5
        return mu, std


# In[13]:


class GatedCNN(nn.Module):
  
  def __init__(self, input_size, hidden_size):
    super().__init__()
    
    self.conv_h = nn.Conv2d(input_size, hidden_size*2, 5, 1, 2)
    # self.conv_h = nn.Conv2d(64, hidden_size*2, 3, 1, 1)
    
  def forward(self, h, x):
    
    h = self.conv_h(torch.cat([x, h], dim=1))
    # h = self.conv_h(x + h)
    
    h1, h2 = torch.chunk(h, 2, dim=1)
    h = torch.tanh(h1) * torch.sigmoid(h2)
    
    return h
    
class AgentCoreBaseVisEncoder(nn.Module):
  def __init__(self, c_in, c_out):
    super().__init__()

    self.conv = nn.Conv2d(c_in, c_out, 3, 1, 1)
    self.pad = nn.ZeroPad2d((0, 1, 0, 1))
    self.max_pool = nn.MaxPool2d(3, 2)
    self.residual = ResidualBlock(c_out, c_out, c_out, 3, 1, 1, 3, 1, 1)

    self.net = nn.Sequential(self.pad, self.conv, self.max_pool, self.residual)

  def forward(self, x):
    return self.net(x)

class AgentCoreDeepVisEncoder(nn.Module):
  def __init__(self):
    super().__init__()

    self.block_1 = AgentCoreBaseVisEncoder(3, 16)
    self.block_2 = AgentCoreBaseVisEncoder(16, 32)
    self.block_3 = AgentCoreBaseVisEncoder(32, 32)
    
    self.fc = nn.Linear(8*8*32, 256)

    self.net = nn.Sequential(self.block_1, self.block_2, self.block_3, nn.Flatten(), nn.ReLU(), self.fc, nn.ReLU())

  def forward(self, x):
    return self.net(x)

def weight_init(m): 
  if isinstance(m, nn.Linear):
    init.normal_(m.weight, std=0.04)

# Modules

In [135]:
import torch
import torch.nn as nn
import torch.nn.functional as F
# from utils import *
from torch.distributions import Categorical
import pdb


# In[ ]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Policy(nn.Module):
  
  def __init__(self, action_dim):
    
    super().__init__()
    self.visual_enc = AgentCoreDeepVisEncoder().to(device)
    
    self.agent_core = nn.LSTM(256, hidden_dim, batch_first=True).to(device)
    
    self.action_layer = nn.Sequential(
      nn.Linear(hidden_dim, action_dim),
    ).to(device)
    
    self.value_layer = nn.Sequential(
      nn.Linear(hidden_dim, 1),
    ).to(device)
    self.action_dim = action_dim

    self.dist = Categorical

  def forward(self, x, done, states):

    B, T, C, H, W = x.shape

    h, c = states # 1, B, H

    x_t_enc = self.visual_enc(x.reshape(B*T, C, H, W)).reshape(B, T, -1)

    states_h = []
    states_c = []

    for t in range(T):

      o, (h, c) = self.agent_core(x_t_enc[:, t:t+1], (h, c))
      
      h = h * done[:, t:t+1]
      c = c * done[:, t:t+1]

      states_h.append(h)
      states_c.append(c)

    states_h = torch.cat(states_h, dim=0).permute(1,0,2) # B, T, H
    states_c = torch.cat(states_c, dim=0).permute(1,0,2)

    value = self.value_layer(states_h) # B, T, 1
    action_logits = self.action_layer(states_h) # B, T, dim_action

    return states_h, states_c, value.squeeze(-1), action_logits

  def act(self, x_t, h, c, deterministic=False):

    x_t_enc = self.visual_enc(x_t)

    o, (h, c) = self.agent_core(x_t_enc.unsqueeze(0), (h, c))

    action_logits = self.action_layer(o.squeeze(0))

    dist = self.dist(logits=action_logits)

    if deterministic:
      actions = torch.argmax(action_logits, dim=-1, keepdim=True)
    else:
      actions = dist.sample()

    log_prob = dist.log_prob(actions)

    return actions, log_prob, h, c


# In[ ]:


class MapDecoder(nn.Module):
  
  def __init__(self, ):
    
    super().__init__()
    
    #TODO
    # self.dec = nn.Sequential(
    #   nn.ConvTranspose2d(hidden_dim*2, 256, 4, 4),
    #   nn.ReLU(),
    #   nn.ConvTranspose2d(256, 128, 4, 4),
    #   nn.ReLU(),
    #   nn.ConvTranspose2d(128, 64, 4, 4),
    #   nn.ReLU(),
    #   nn.Conv2d(64, 3, 3, 1, 1)
    # ).to(device)
    self.dec = nn.Sequential(
      Conv2dBlock(hidden_dim*2, 256*4*4, 1, 1, 0),
      nn.PixelShuffle(4),
      Conv2dBlock(256, 128*4*4, 3, 1, 1),
      nn.PixelShuffle(4),
      Conv2dBlock(128, 64*4*4, 3, 1, 1),
      nn.PixelShuffle(4),
      nn.Conv2d(64, 3, 3, 1, 1),
      nn.Sigmoid()
    ).to(device)
    

  def forward(self, h, map_gt, done):
    map_pred = self.dec(h.unsqueeze(-1).unsqueeze(-1))
    loss = ((map_pred - map_gt).pow(2).sum(dim=(1,2,3)) * done).mean()
    return map_pred, loss


# In[ ]:


class PositionDecoder(nn.Module):
  
  def __init__(self, ):
    
    super().__init__()
    
    self.dec = nn.Sequential(
      nn.Linear(hidden_dim*2, pos_dim),
    ).to(device)
    
  def forward(self, h, pos_gt, done):
    pos_cls_gt = pos_gt[:, 0]*10 + pos_gt[:, 1] # 0~99
    pos_pred = self.dec(h)
    pos_acc = ((pos_pred.argmax(dim=1) * done).long() == (pos_cls_gt * done).long()).float().mean()
    loss = F.cross_entropy(pos_pred, pos_cls_gt.long(), reduction='none')
    loss = (loss * done).mean()
    return pos_acc, loss

class RotDecoder(nn.Module):
  
  def __init__(self, ):
    
    super().__init__()
    
    self.dec = nn.Sequential(
      nn.Linear(hidden_dim*2, rot_dim),
    ).to(device)
    
  def forward(self, h, rot_gt, done):
    rot_pred = self.dec(h)
    rot_acc = ((rot_pred.argmax(dim=1)*done).long() == (rot_gt*done).long()).float().mean()

    loss = F.cross_entropy(rot_pred, rot_gt.long(), reduction='none')
    loss = (loss * done).mean()
    return rot_acc, loss

# In[80]:


class ConvDraw(nn.Module):
  
  def __init__(self, ):
    
    super().__init__()
    
    self.img_enc = nn.Sequential(
      Conv2dBlock(3*2, 16, 4, 2, 1),
      Conv2dBlock(16, 16, 4, 2, 1),
      Conv2dBlock(16, 64, 4, 2, 1),
    ).to(device)
    
    self.img_dec = nn.Sequential(
      Conv2dBlock(64, 32*2*2, 3, 1, 1),
      nn.PixelShuffle(2),
      Conv2dBlock(32, 32*2*2, 3, 1, 1),
      nn.PixelShuffle(2),
      nn.Conv2d(32, 3*2*2, 3, 1, 1),
      nn.PixelShuffle(2),
    ).to(device)
    
    self.sim_core_enc = nn.Sequential(
      nn.ConvTranspose2d(hidden_dim, 256, 4, 2, 1),
      nn.GroupNorm(32, 256),
      nn.ReLU(),
      nn.ConvTranspose2d(256, 128, 4, 2, 1),
      nn.GroupNorm(16, 128),
      nn.ReLU(),
      nn.ConvTranspose2d(128, 64, 4, 2, 1),
      nn.GroupNorm(8, 64),
      nn.ReLU(),
    ).to(device)
    
    self.num_steps = 8

    self.decoder = nn.ModuleList([GatedCNN(64+32, 64).to(device) for _ in range(self.num_steps)])
    self.posterior = nn.ModuleList([GatedCNN(64*2, 64).to(device) for _ in range(self.num_steps)])
    self.prior = nn.ModuleList([GatedCNN(64*2, 64).to(device) for _ in range(self.num_steps)])
    
    self.gaussian_p = GaussianWithConv2d(64, 32).to(device)
    self.gaussian_q = GaussianWithConv2d(64, 32).to(device)

    
    
    
  def init_states(self, B):
    
    h_p = torch.zeros(B, 64, 8, 8, device=device, dtype=torch.float)
    h_q = torch.zeros(B, 64, 8, 8, device=device, dtype=torch.float)
    h_rec = torch.zeros(B, 64, 8, 8, device=device, dtype=torch.float)
    
    return h_p, h_q, h_rec
  
  def forward(self, sim_core_h, x_t, is_training=True):
    
    B = x_t.shape[0]
    
    h_p, h_q, h_rec = self.init_states(B)
    
    sim_core_h = sim_core_h.unsqueeze(-1).unsqueeze(-1)
    sim_core_h_enc = self.sim_core_enc(sim_core_h)
    
    rec_err = x_t.new_zeros(x_t.shape)
    rec_x = x_t.new_zeros(x_t.shape)
    
    kl_loss = 0.
    
    for i in range(self.num_steps):
      
      if i == 0:
        for _ in range(4):
          h_p = self.prior[i](h_p, sim_core_h_enc)
      else:
        h_p = self.prior[i](h_p, sim_core_h_enc)
      
      # x_t_enc = self.img_enc(x_t)
      
      if is_training:
        rec_err_enc = self.img_enc(torch.cat([x_t, rec_err], dim=1))
        h_q = self.posterior[i](h_q, rec_err_enc)
        
        mu_q, std_q = self.gaussian_q(h_q)
        mu_p, std_p = self.gaussian_p(h_p)
        z = self.sample(mu_q, std_q)
        kl_loss = kl_loss + self.cal_kl(mu_p, std_p, mu_q, std_q)
        
        h_rec = self.decoder[i](h_rec, z)
        x_hat = self.img_dec(h_rec)
        rec_x = rec_x + x_hat
        rec_err = x_t - rec_x

      else:
        
        mu_p, std_p = self.gaussian_p(h_p)
        z = self.sample(mu_p, std_p)
      
        h_rec = self.decoder[i](h_rec, z)
        x_hat = self.img_dec(h_rec)
        rec_x = rec_x + x_hat

      rec_x = rec_x.sigmoid()

    if is_training:

      constraint = self.constraint(x_t, rec_x)
      return rec_x, constraint.mean(), kl_loss.sum(dim=(1,2,3)).mean()

    else:
      return rec_x
        
  def constraint(self, x, rec):
    # kappa is pixel space error threshold
    return torch.sum(torch.pow(rec - x, 2), dim = (1,2,3)) - self.kappa * 64 * 64 * 3

  def sample(self, mu, std):
    
    noise = torch.empty_like(mu).normal_()
    
    return mu + std * noise
  
  def cal_kl(self, mu_p, std_p, mu_q, std_q):
    
    var_ratio = (std_q / std_p) ** 2
    
    return 0.5 * (((mu_q - mu_p) / std_p) ** 2 + (var_ratio - 1) - var_ratio.log())

# Logger

In [136]:
from torch.utils.tensorboard import SummaryWriter
from enum import Enum
from threading import Thread
from queue import Empty


class SummaryType(Enum):
    SCALAR = 1
    HISTOGRAM = 2
    VIDEO = 3
    IMAGE = 4
    FIGURE = 5
    GRAPH = 6


# Not working asynchronously
class Statistics(Thread):
    """Writes the statistics of the async processes into a tensorboard"""

    def __init__(self, writer_dir, statistics_queue, nb_episodes):

        super(Statistics, self).__init__()

        self.exit = False

        self.stats_queue = statistics_queue
        self.nb_episodes = nb_episodes

        self._writer = SummaryWriter(log_dir=writer_dir)

    def run(self):

        super(Statistics, self).run()

        # Make sure that all the logs are pushed to the tensorboard
        last_step = 0
        while True:

            try:
                summary_type, tag, data = self.stats_queue.get(timeout=1)
            except Empty:
                if self.exit:
                    break
                continue

            # Push the informations
            step = self.nb_episodes.value

            frames = step * 100

            if summary_type == summary_type.SCALAR:
                self._writer.add_scalar(tag=tag, scalar_value=data, global_step=frames)

            elif summary_type == summary_type.HISTOGRAM:
                self._writer.add_histogram(
                    tag=tag, values=data, global_step=frames, bins="tensorflow"
                )

            elif summary_type == summary_type.FIGURE:
                self._writer.add_figure(tag=tag, figure=frames, global_step=step)

            elif summary_type == summary_type.IMAGE:
                if data.dim() > 3:
                    self._writer.add_images(
                        tag=tag, img_tensor=data, global_step=frames, dataformats="NCHW"
                    )
                else:
                    self._writer.add_image(
                        tag=tag, img_tensor=data, global_step=frames, dataformats="CHW"
                    )

            elif summary_type == summary_type.VIDEO:
                self._writer.add_video(tag=tag, vid_tensor=data, global_step=frames, fps=4)

            elif summary_type == summary_type.GRAPH:
                self._writer.add_graph(model=data)

        self._writer.close()

# Actor class

In [242]:
import torch
import torch.nn as nn
import torch.nn.functional as F
# import torch.multiprocessing as mp
# from modules import *
import numpy as np
import queue
# from logger import SummaryType
from mlagents_envs.environment import UnityEnvironment
from pyvirtualdisplay import Display
from torch.utils.tensorboard import SummaryWriter
import imageio
from PIL import Image, ImageDraw
from collections import deque, defaultdict, namedtuple
# from env_utils import *
import gzip
import pickle
from glob import glob
import pdb

device = torch.device("cpu")

Trajectory = namedtuple(
    "Trajectory",
    [
        "agent_view",
        "map_top_down",
        "a",
        "pos",
        "rot",
    ],
)

class Memory:
  def __init__(self,id: int,num_steps,):
    self.id = id
    self.agent_view = torch.zeros(1 + num_steps, 3, 64, 64, dtype=torch.float) #num of steps the agent moved gives those many views!
    self.map_top_down = torch.zeros(1 + num_steps, 3, 64, 64, dtype=torch.float) #similar to above!
    self.a = torch.zeros(1 + num_steps, 1, dtype=torch.float) #action that agent took in each step!
    self.pos = torch.zeros(1 + num_steps, 2, dtype=torch.float) #x and z coridinates of the agent each step!
    self.rot = torch.zeros(1 + num_steps, dtype=torch.float) 
    self.step = 0
    self.num_steps = num_steps
    self.id = id

  def add(
    self,
    agent_view: torch.Tensor,
    map_top_down: torch.Tensor,
    a: torch.Tensor,
    pos: torch.Tensor,
    rot: torch.Tensor,
  ):
    self.agent_view[self.step].copy_(agent_view)
    self.map_top_down[self.step].copy_(map_top_down)
    self.a[self.step].copy_(a)
    self.pos[self.step].copy_(pos)
    self.rot[self.step].copy_(rot)
    self.step += 1
  
  def reset(self):
    self.step = 0

  def to(self, device):
    self.agent_view.to(device)
    self.map_top_down.to(device)
    self.a.to(device)
    self.pos.to(device)
    self.rot.to(device)

  def enqueue(self):
    assert self.step == (self.num_steps +1), "actor {} has mistake in traj length".format(self.id)
    return Trajectory(
      agent_view=self.agent_view.clone().to(device),
      map_top_down=self.map_top_down.clone().to(device),
      a=self.a.clone().to(device),
      pos=self.pos.clone().to(device),
      rot=self.rot.clone().to(device),
    )

  def save(self, filename):
    traj = self.enqueue()
    with gzip.open(filename, 'wb') as f:
      pickle.dump(traj, f)

class Actor(object):

  def __init__(
    self,
    id,
    q,
    nb_episodes,
    timeout=10
  ):

    self.id = id

    self.timeout = timeout

    self.q = q

    #todo: what is mp.Event()
    self.completion = mp.Event()
    self.p = mp.Process(target=self._act,
                            name='actor_test')

    self.memory = Memory(id, lu)
    self.episode_counter = nb_episodes


  def _act(self):

    try:
      print("Actor {} started.".format(self.id))


      file_prex = ['./Datasets/50k/actor-{}-'.format(i) for i in range(1, 26)]
      gened_files = [glob(f + '*') for f in file_prex]
      num_gened_files = np.array([len(f) for f in gened_files])
      less_than_2000 = np.where(num_gened_files < 2000)[0]

      id_old = less_than_2000[self.id-1] + 1
      traj_no = num_gened_files[less_than_2000[self.id-1]]
      self.id = less_than_2000[self.id-1] + 1 + 25 # self.id idx starts from 1

      print("actor-{} starts re-gen {} from traj-{}".format(self.id, id_old, traj_no))

      # todo: dataset
      display = Display(backend='xvnc', size=(64,64), visible=0, rfbport=5854+self.id)
      display.start()
      env = UnityEnvironment(file_name="RC.x86_64", worker_id=self.id, seed=self.id, timeout_wait=1000)
#       env = UnityEnvironment(file_name="RC.x86_64", seed=1, timeout_wait=1000)  
      dtype = torch.float
      episode_reward = 0
      episode_n = 0
      duration = 0

      # while not self.learner.completion.is_set():
      while traj_no < 2000:

        traj_no += 1

        traj_id = (self.id, traj_no)

        self.memory.reset()

        step = 0
        act_step = 0
        num_steps = 0
        action = np.array([1])
        action_prev = action
        env.reset()
        while step < 100 + 1:
          print("Entered into some while")

          decision, _ = env.get_steps('My Behavior?team=0')

          agent_view = decision.obs[0][0] # 64, 64, 3

          vector_obs = decision.obs[1]

          agent_pos, agent_rot = dec_agent_pos_rot(vector_obs[0, :6])
          building_pos, building_scale = dec_building_pos_scale(vector_obs[0, 6:])

          if act_step >= num_steps:
            action, num_steps = self.sample_action(agent_rot, agent_pos,
                                              building_pos, building_scale)
            act_step = 0
            action_prev = action
          else:
            action = action_prev

          env.set_actions('My Behavior?team=0', action.reshape(1, 1))
          env.step()
          act_step = act_step + 1

          # todo: normalize position
          if step == 0:
            map_top_down = dec_top_down_map(vector_obs[0, 6:])
            map_ground = np.array([239, 176, 131]) / 255. # ground color is hard coded
            map_top_down = map_top_down + (1. - (map_top_down > 0.)) * map_ground
            map_top_down = torch.tensor(map_top_down, device=device, dtype=dtype).permute(2, 0, 1)
          
          agent_view = torch.tensor(agent_view, device=device, dtype=dtype).permute(2, 0, 1) # unity output pixel value [0, 1]
          pos = np.array([agent_pos[0], agent_pos[2]])
          pos = torch.tensor(pos, device=device, dtype=dtype)
          rot = torch.tensor(agent_rot[1] % 360, device=device, dtype=dtype)

          self.memory.add(agent_view, map_top_down, torch.tensor(action).float(), pos, rot) # belief state at t, agent_view at t+1

          if ((traj_no-1) % 1000 == 0) and (self.id < 2):
            if step == lu:
              im = Image.fromarray((map_top_down * 255).permute(1,2,0).cpu().numpy().astype(np.uint8))
              im.save('./image_samples/map_top_down-actor{}-traj{}-step100.png'.format(self.id, traj_no))
              print("saved!!")  
              traj_map = plot_traj(map_top_down.permute(1,2,0).cpu().numpy(), self.memory.pos.cpu().numpy())
              im = Image.fromarray((traj_map).astype(np.uint8))
              im.save('./image_samples/traj_map-actor{}-traj{}-step0.png'.format(self.id, traj_no))
              print("saved!!") 
              imageio.mimsave('./image_samples/sample-actor{}-traj{}-step0.gif'.format(self.id, traj_no),
                              np.array(self.memory.agent_view.permute(0,2,3,1).cpu()).reshape(-1, 64, 64, 3)*255.)
              print("saved!!") 
          step += 1
          duration += 1


        self.episode_counter.value += 1
        while True:

          try:

            file_name = './Datasets/50k/actor-{}-traj-{}.pkl'.format(self.id, traj_no)
            self.memory.save(file_name)
            # self.q.put(self.memory.enqueue(), timeout=self.timeout)
            if traj_no % 100 == 0:
              print("Actor {} upload trajectory {} successfully.".format(self.id, traj_no))
            break

          except queue.Full:
            continue

      env.close()
      display.stop()
      self.completion.set()
      return
    
    except KeyboardInterrupt:
      env.close()
      display.stop()
      self.completion.set()
      return

    except Exception as e:
      env.close()
      raise e

  def sample_action(self, agent_rot, agent_pos, building_pos, building_scale):
    min_dis, angle = comp_min_distance_angle(agent_pos, building_pos, building_scale)

    def _sample_action(building_angle, agent_rot, agent_pos,
                      building_pos, building_scale, vel=1.5):
      num_steps = 1
      agent_pos_x = agent_pos[0]
      agent_pos_z = agent_pos[2]
      
      action = np.array([1])
      vel_x = np.cos((90 - agent_rot[1]) / 180 * np.pi) * vel
      vel_z = np.sin((90 - agent_rot[1]) / 180 * np.pi) * vel
      
      pos_x = agent_pos_x + vel_x
      pos_z = agent_pos_z + vel_z
      
      agent_pos_ = np.array([pos_x, agent_pos[1], pos_z])
      
      min_dis, building_angle = comp_min_distance_angle(agent_pos_, building_pos, building_scale)
      
      
      agent_rot_ = (-agent_rot[1] + 90) % 360 # opposite rotation direction, agent facing y+ is 0 rotation
      angle_diff = abs(agent_rot_ - building_angle)
      
      _, angle_diff_all = agent_building_angle(agent_pos_, agent_rot_, building_pos, building_scale)
      go_out = (angle_diff_all > 90) * (angle_diff_all < 260)
      
      i = 0
      while (abs(pos_z) > 35) or (abs(pos_x) > 35) or ((min_dis < 7.) and (angle_diff < 50 or angle_diff > 300)) or go_out.all():
        action_prob = np.array([0.2, 0.2, .2, 0.2, 0.2])
        action = np.random.choice(np.arange(5), size=1, p=action_prob) + 1

        if (action == 4) or (action == 5):
          if (abs(pos_z > 33) and abs(pos_x > 33)):
            num_steps = 6
          else:
            num_steps = np.random.choice([1, 2, 3, 4, 5, 6], p=np.array([0.15, 0.15, 0.15, 0.15, 0.15, 0.25]))
        else:
          num_steps = 1
            
        if action == 1:
          rot = 0
          vel = 1.5
        if action == 2:
          rot = 30
          vel = 1.5
        if action == 3:
          rot = -30
          vel = 1.5
        if action == 4:
          rot = 30 * num_steps
          vel = 0
        if action == 5:
          rot = -30 * num_steps
          vel = 0
          
        agent_rot_ = agent_rot[1]
        pos_x_ = agent_pos_x
        pos_z_ = agent_pos_z
        min_dis_ = min_dis
          
        if action == 2 or action == 3:
          rot = rot / 5
          for _ in range(5): # unity takes 5 small steps
            agent_rot_ = agent_rot_ + rot

            vel_x = np.cos((90 - agent_rot_) / 180 * np.pi) * vel / 5.
            vel_z = np.sin((90 - agent_rot_) / 180 * np.pi) * vel / 5.
            pos_x_ = pos_x_ + vel_x
            pos_z_ = pos_z_ + vel_z
            
          rot = rot * 5
            
        else:
          agent_rot_ = agent_rot_ + rot

          vel_x = np.cos((90 - agent_rot_) / 180 * np.pi) * vel
          vel_z = np.sin((90 - agent_rot_) / 180 * np.pi) * vel

          pos_x_ = pos_x_ + vel_x
          pos_z_ = pos_z_ + vel_z

        agent_pos_ = np.array([pos_x_, agent_pos[1], pos_z_])
        min_dis_, building_angle =  comp_min_distance_angle(agent_pos_, building_pos, building_scale)

        agent_rot_ = (-agent_rot_ + 90) % 360 # opposite rotation direction, agent facing y+ is 0 rotation
        angle_diff = abs(agent_rot_ - building_angle)
        
        _, angle_diff_all = agent_building_angle(agent_pos_, agent_rot_, building_pos, building_scale)
        go_out = (angle_diff_all > 90) * (angle_diff_all < 260)

        pos_x = pos_x_
        pos_z = pos_z_
        min_dis = min_dis_
      
        if i > 10:
          action_prob = np.array([0.2, 0.2, .2, .2, .2])
          action = np.random.choice(np.arange(5), size=1, p=action_prob) + 1
          break
        i += 1 
    
      return action , num_steps

    return _sample_action(angle, agent_rot, agent_pos, building_pos, building_scale)

    
  def start(self):
    if debug:
      self._act()
    else:
      self.p.start()


  def terminate(self):
    self.p.terminate()


  def join(self):

    self.p.join()

In [243]:
q = mp.Queue(maxsize = 25)
update_counter = Counter(init_val=0)
nb_episodes = mp.Value("i", 0)

In [244]:
actors = []
num_actors=6

In [245]:
for i in range(num_actors):
    actors.append(Actor(i+1, q,
                        nb_episodes))

In [246]:
actors

[<__main__.Actor at 0x7f8bec18b810>,
 <__main__.Actor at 0x7f8bec2fd090>,
 <__main__.Actor at 0x7f8bec18b2d0>,
 <__main__.Actor at 0x7f8bec19bc90>,
 <__main__.Actor at 0x7f8bec18bb50>,
 <__main__.Actor at 0x7f8bec19bb90>]

In [240]:
display = Display(backend="xvnc", size=(64,64), visible=0, rfbport=5879)
display.start()

<pyvirtualdisplay.display.Display at 0x7f8bec278e10>

In [241]:
display.stop()

<pyvirtualdisplay.display.Display at 0x7f8bec278e10>

In [3]:
for a in actors:
    a.start()

In [None]:
for a in actors:
    a.completion.wait()

In [None]:
for a in actors:
    a.terminate()

In [None]:
for a in actors:
    a.join()

In [117]:
env.close()

AttributeError: 'str' object has no attribute 'close'

In [118]:
display.stop()

$DISPLAY was already unset.


<pyvirtualdisplay.display.Display at 0x7f8c30147d90>

In [12]:
env = 'unity' # deepmindlab

log_path = './log/' + env + '/mem/50k'
exp_name ='exp'
num_actors = 6

queue_maxsize = 25
lu = 100
lo = 32
traj_no_total = 2000
max_update = 1000000
batch_size = 24

rho_bar = 1.
c_bar = 1.
gamma = 0.99
clip_reward = False

grad_clip_norm = 10.
init_lr = 0.0002
epoch = 1000

policy_loss_c = 1.
v_loss_c = 0.5
entropy_c = 0.00015
model_loss_c = 0.1

action_repeat = 6
action_dim = 9
hidden_dim = 512
act_emb_dim = 128
max_frame = 6e4

latent_dim = 16
draw_step = 8

# memory
code_size = 1024
memory_size = 64
dim_s = 128

map_dec_type = 'pixel' # deconv-complex, deconv_simple
mem_type = 'static'

model_save_path = 'model_saved'

kappa = 1e-3

if env == 'unity':
  map_size = [100., 100.]
  pos_dim = (100 // 20)**2 # 5X5 grids
else:
  map_size = [1000., 1000.]
  pos_dim = 1000 // 10
rot_dim = 360 // 18
debug = False

level_cache_dir = '/tmp/level_cache'


In [40]:
env.close()

AttributeError: 'str' object has no attribute 'close'

In [41]:
display.stop()

$DISPLAY was already unset.


<pyvirtualdisplay.display.Display at 0x7f8c3673aed0>