In [186]:
import torch
import torch.nn as nn
import torch.nn.utils as nn_utils
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

import sys

sys.path.append('../src')

from control.abstract_control import Controller

In [305]:
class NnA2CController(nn.Module, Controller):
    def __init__(self, frames_shape):
        """Setus up all of the neural networks necessary for the controller to work
        
        Parameters
        ----------
        frames_shape
            the shape of the multidimensional carla data fetched from an agent's sensor
        
        Attributes
        ----------
        conv_net: nn.Sequential
            a neural network responsible for extracting the current state of the environment, whose output is meant to serve as input for policy and critic networks
        actor_net: nn.Sequential
            a neural network responsible for the current agent's policy
        critic_net: nn.Sequential
            a neural network responsible for approximating the advantage function regarding the action space
        
        Methods
        -------
        TBD
        """
        super(NnA2CController, self).__init__()
        
        #lenet inspired net upscaled due to carla frames being bigger than minst digits ;)
        self.conv_net = nn.Sequential(
            nn.Conv2d(in_channels=frames_shape[0], out_channels=128, kernel_size=7, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=4),
            nn.Conv2d(in_channels=64, out_channels=120, kernel_size=3, stride=1),
            nn.Tanh()
        )
        
        self.conv_out_size = int(np.prod(self.conv_net(torch.zeros(1, *frames_shape)).size()))
        
        self.actor_net = nn.Sequential(
            nn.Linear(self.conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, 2) #2 returned values being the action taken, which consists of the gas/break pedal and steering angle suggested for steering
        )

        self.critic_net = nn.Sequential(
            nn.Linear(self.conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )
    
        
    def forward(self, sensor_data):
        conv_output = self.conv_net(sensor_data).view(sensor_data.size()[0], -1)
        return self.actor_net(conv_output), self.critic_net(conv_output)
    
    def control(self, state):

        #leaving the following state elements here, but intend to use just camera data for starters
        location = state['location']
        x, y = location[0], location[1]
        v = state['velocity'] # km / h #how to get speed?????????
        ψ = np.radians(state['yaw']) #adding 180 as carla returns yaw degrees in (-180, 180) range

        actor_out, critic_out = self.forward(torch.cat((state['depth'], state['rgb']), 1))
        
        actions = {
            'steer': actor_out[0][0],
            'gas_brake':  actor_out[0][1],
        }
        
        advantage = critic_out[0][0]

        return actions, advantage

In [306]:
nn_controller = NnA2CController([8,75,100])
nn_controller.conv_out_size

4800

In [307]:
state = {'location': (1, 33), 'velocity': 5, 'yaw': 12, 'depth': torch.zeros(1, 4, 75, 100), 'rgb': torch.zeros(1, 4, 75, 100)}
state

{'location': (1, 33),
 'velocity': 5,
 'yaw': 12,
 'depth': tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0., 

In [308]:
output = nn_controller.control(state)
output

({'steer': tensor(0.0530, grad_fn=<SelectBackward>),
  'gas_brake': tensor(-0.0371, grad_fn=<SelectBackward>)},
 tensor(0.0406, grad_fn=<SelectBackward>))

In [275]:
nn_controller

NnA2CController(
  (conv_net): Sequential(
    (0): Conv2d(8, 128, kernel_size=(7, 7), stride=(1, 1))
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(128, 64, kernel_size=(5, 5), stride=(1, 1))
    (4): Tanh()
    (5): AvgPool2d(kernel_size=4, stride=4, padding=0)
    (6): Conv2d(64, 120, kernel_size=(3, 3), stride=(1, 1))
    (7): Tanh()
  )
  (actor_net): Sequential(
    (0): Linear(in_features=4800, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=2, bias=True)
  )
  (critic_net): Sequential(
    (0): Linear(in_features=4800, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=1, bias=True)
  )
)