# Agent

In [None]:
import torch
import torch.nn.functional as F

import numpy as np
import os 
  
import cv2
   
import torch.nn as nn 
from torch.distributions import Normal

  
from collections import OrderedDict



action_low = np.array([0.0, 0.0])
action_high = np.array([0.5, 0.5])

action_dim = 2
obs_shape = (64, 64, 3)
 
DEVICE = "cpu"
 
LOG_SIG_MAX = 2
LOG_SIG_MIN = -20
epsilon = 1e-6


sac_args = AttrDict({      
    'hidden_size': 512, 
    'input_dim': 32    
})



def action_rescale(action):
    action = action_low + (action + 1.0) * 0.5 * (action_high - action_low)
    return np.clip(action, action_low, action_high)
 




# Initialize Policy weights
def weights_init_(m):
 
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight, gain=1)
        torch.nn.init.constant_(m.bias, 0)

  



class ConvBlock(nn.Module):
   
    def __init__(self, in_channels: int, 
                        out_channels: int,
                        kernel_size: int, 
                        stride: int = 2, 
                        padding: int = 1, 
                        slope: float = 0.2):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.LeakyReLU(negative_slope=slope)
    
    def forward(self, x):
  
        return self.relu(self.bn(self.conv(x)))

 

class Encoder(nn.Module):
 
    def __init__(self, z_dim: int = 32):
        super().__init__()

        # encoder
        self.encoder = nn.Sequential(OrderedDict([
            ("conv1", nn.Conv2d(3, 32, 4, stride=2, padding=1)),
            ("relu1", nn.LeakyReLU(0.2)), #32x32x32
            ("block1", ConvBlock(32, 64, 4, stride=2, padding=1, slope=0.2)), # 64x16x16
            ("block2", ConvBlock(64, 128, 4, stride=2, padding=1, slope=0.2)), # 128x8x8
            ("block3", ConvBlock(128, 256, 4, stride=2, padding=1, slope=0.2)), # 256x4x4
        ]))

        self.fc = nn.Linear(4096, z_dim)
      
 
    def forward(self, x):
 
        x = self.encoder(x)
        x = x.view(-1, 4096)
        return self.fc(x) 


class GaussianPolicy(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_dim):
        super().__init__()
        
        self.enc = Encoder(num_inputs)
        self.linear1 = nn.Linear(num_inputs + num_actions, hidden_dim) # 32, 512.
        self.linear2 = nn.Linear(hidden_dim, hidden_dim) # 512, 512.

        self.mean_linear = nn.Linear(hidden_dim, num_actions) # 512, 2.
        self.log_std_linear = nn.Linear(hidden_dim, num_actions) # 512, 2.

        self.apply(weights_init_)


    def forward(self, state, vel):
        
        # state = self.enc(state)
        x = torch.cat([self.enc(state), vel], 1)        

        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        mean = self.mean_linear(x)
        log_std = self.log_std_linear(x)
        log_std = torch.clamp(log_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX)
        return mean, log_std


    def sample(self, state, vel):
        mean, log_std = self.forward(state, vel)
        std = log_std.exp()
        normal = Normal(mean, std)
        x_t = normal.rsample()  # for reparameterization trick (mean + std * N(0,1))
        action = torch.tanh(x_t)
        log_prob = normal.log_prob(x_t)

        # Enforcing Action Bound
        log_prob -= torch.log(1 - action.pow(2) + epsilon)
        log_prob = log_prob.sum(1, keepdim=True)

        return action, log_prob, torch.tanh(mean) # (b, act_dim), (b, 1), (b, act_dim). 

 
 
class SAC(object):
 
    def __init__(self):
           
        self.policy = GaussianPolicy(sac_args.input_dim, action_dim, sac_args.hidden_size).to(DEVICE)
         
      
    def select_action(self, obs, vel):
        '''
            obs: BGR, (3, 64, 64).
        '''

        obs = torch.FloatTensor(obs).to(DEVICE).unsqueeze(0) # (1, 3, 64, 64).
        vel = torch.FloatTensor(vel).to(DEVICE).unsqueeze(0) # (1, 2).

        assert tuple(obs.shape) == (1, 3, 64, 64)

        action, _, _ = self.policy.sample(obs, vel)

        action = action.detach().cpu().numpy() # (1, 2).
        assert  action.shape == (1, action_dim)


        return action_rescale(action[0]) # (2,).


 

    def load(self, dir_name, name):

        path = os.path.join(dir_name, name)

        checkpoint = torch.load(path)
      
        self.policy.load_state_dict(checkpoint['policy']) 
    
        print(f'loaded ckpt: {path}')


 

In [None]:
agent = SAC()

agent.load('Python-Wrapper/ckpt', 'model_600-r2800')


# U-Net

In [None]:

class UNet(object):
 
    def __init__(self):
           
        pass


    def load(self, dir_name, name):

        pass
      



In [None]:
unet = UNet()

unet.load('', '')


# Camera

In [None]:
import traitlets
from IPython.display import display
import ipywidgets.widgets as widgets
from jetbot import Camera, bgr8_to_jpeg

camera = Camera.instance(width=224, height=224)
image = widgets.Image(format='jpeg', width=224, height=224)
blocked_slider = widgets.FloatSlider(description='blocked', min=0.0, max=1.0, orientation='vertical')
speed_slider = widgets.FloatSlider(description='speed', min=0.0, max=0.5, value=0.0, step=0.01, orientation='horizontal')

camera_link = traitlets.dlink((camera, 'value'), (image, 'value'), transform=bgr8_to_jpeg)

display(widgets.VBox([widgets.HBox([image, blocked_slider]), speed_slider]))

# Robot

In [None]:

from jetbot import Robot


robot = Robot() 
robot.left(speed=0)
robot.right(speed=0)



# Run

In [None]:

import cv2
import numpy as np
import torch
import torchvision

import time

from PIL import Image
 

 
cur_vel = np.array([0.0, 0.0])


def preprocess(camera_value): 
    '''
        Semantic segmentation using U-Net.
    '''
    global unet
    
    x = camera_value # (h, w, c), BGR. 

    img = Image.fromarray(x)
    img = img.resize((64, 64), Image.BILINEAR)
    img = np.array(img) # (64, 64, 3) 
 
    img = np.transpose(img, (2, 0, 1)) # (c, h, w).

    img = unet(img) # TODO: Implement unet semantic segmentation.

    return img

  
  
def update(change):

    global robot, agent
    
    cur_obs = preprocess(change['new'])
    
    cur_vel = agent.select_action(cur_obs, cur_vel)

    robot.left(speed=cur_vel[0])
    robot.right(speed=cur_vel[1])
      
    time.sleep(0.001) # If performnace is not good, try tuning this delay.
        

update({'new': camera.value})  # we call the function once to initialize

camera.observe(update, names='value')  # this attaches the 'update' function to the 'value' traitlet of our camera


In [None]:
camera.unobserve(update, names='value')
time.sleep(0.1)  # add a small sleep to make sure frames have finished processing

robot.left(speed=0)
robot.right(speed=0)

# Clean up

In [None]:
import time

robot.stop()

camera_link.unlink()  # don't stream to browser (will still run camera)
# camera_link.link()  # stream to browser (wont run camera)

camera.stop()
