In [1]:
import numpy as np
from dataset import SymbolicDataset

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from transformers import BertConfig, BertModel

%env CUDA_VISIBLE_DEVICES=3

env: CUDA_VISIBLE_DEVICES=3


In [2]:
dataset = SymbolicDataset('/zfsauton/datasets/ArgoRL/brianyan/carla_data/')
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

100%|██████████| 458/458 [00:02<00:00, 184.26it/s]

Number of samples: 743036





In [3]:
sample = next(iter(dataloader))
obs, action, reward, done, next_obs = sample
ego_features = obs['ego_features']
bounding_box = ego_features['bounding_box']

other_features = obs['vehicle_features']
other_idx = list(other_features.keys())[0]
other_bounding_box = other_features[other_idx]['bounding_box']

In [4]:
x = torch.tensor(bounding_box)[::2]
y = torch.tensor(other_bounding_box)[::2]

def positional_encoding(p, L=10):
    return torch.stack([torch.sin(2**i * np.pi * p) for i in range(L)] + [torch.cos(2**i * np.pi * p) for i in range(L)], dim=-1)

In [78]:
class TransformerAgent(pl.LightningModule):
    def __init__(self, embedding_size=128):
        super().__init__()

        self.embedding_size = embedding_size

        config = BertConfig(
            vocab_size=1, # we do our own embeddings
            num_attention_heads=8,
            hidden_size=self.embedding_size,
            intermediate_size=512,
        )
        self.model = BertModel(config)

        self.action_predictor = nn.Sequential(
            nn.Linear(embedding_size, 128),
            nn.ReLU(),
            nn.Linear(128, 2),
            nn.Tanh()
        )

        self.segment_embedding = nn.Embedding(3, 128)

        self.vehicle_encoder = nn.Sequential(
            nn.Linear(2, 128),
            nn.ReLU(),
            nn.Linear(128, self.embedding_size)
        )

        self.ego_encoder = nn.Sequential(
            nn.Linear(3, 128),
            nn.ReLU(),
            nn.Linear(128, self.embedding_size)
        )

        self.waypoint_encoder = nn.Linear(1, 128)

    def make_ego_token(self, obs_dict):
        ego_features = obs_dict['ego_features']
        ego_vehicle_token = self.make_vehicle_token(ego_features)
        ego_encodings = self.ego_encoder(
            torch.FloatTensor([obs_dict['light'], obs_dict['next_orientation'], obs_dict['dist_to_trajectory']]).cuda()
        )
        ego_token = ego_vehicle_token + ego_encodings
        return ego_token

    def make_vehicle_token(self, actor_features, is_ego=False):
        positions = torch.tensor(actor_features['bounding_box'])[::2]
        positional_encodings = positional_encoding(positions, L=32).cuda().reshape(-1,128)

        feature_encodings = self.vehicle_encoder(
            torch.FloatTensor([actor_features['theta'], actor_features['speed']]).cuda()
        )

        token = feature_encodings + positional_encodings
        return token

    def make_waypoint_tokens(self, waypoints):
        waypoints = torch.tensor(waypoints)[:,:2]
        positional_encodings = positional_encoding(waypoints, L=32).cuda().reshape(-1,128)

        feature_encodings = self.waypoint_encoder(
            torch.FloatTensor([i for i in range(len(waypoints))]).reshape(-1,1).cuda()
        )
        token = feature_encodings + positional_encodings
        return token

    def forward(self, obs):
        ego_token = self.make_ego_token(obs)
        if obs['vehicle_features']:
            vehicle_tokens = torch.stack([self.make_vehicle_token(obs['vehicle_features'][idx]) for idx in obs['vehicle_features']]).reshape(-1,128)
        else:
            vehicle_tokens = torch.zeros((0,128)).cuda()
        waypoint_tokens = self.make_waypoint_tokens(obs['next_waypoints'])
        tokens = torch.cat([ego_token, vehicle_tokens, waypoint_tokens], dim=0)

        output = self.model(
            inputs_embeds=tokens[None].float()
        )
        pooler_output = output[1]
        pred_action = agent.action_predictor(pooler_output)

        return pred_action

    def training_step(self, batch, batch_idx, optimizer_idx):
        obs, action, reward, done, next_obs = batch
        pred_action = self.forward(obs)
        loss = F.mse_loss(pred_action, action)
        self.log('train/bc_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        obs, action, reward, done, next_obs = batch
        pred_action = self.forward(obs)
        loss = F.mse_loss(pred_action, action)
        self.log('val/bc_loss', loss)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=1e-4)


In [79]:
agent = TransformerAgent().cuda()

In [80]:
for obs, action, reward, done, next_obs in dataloader:
    pred_action = agent.forward(obs)
    print(pred_action)

tensor([[ 0.0860, -0.0394]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[0.0212, 0.0019]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[0.0380, 0.0086]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1101, -0.0357]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[0.0474, 0.0219]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1028, -0.0358]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.0414, -0.0039]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1183, -0.0194]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1230, -0.0267]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[0.0211, 0.0021]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[0.0009, 0.0131]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.0052, -0.0036]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1067, -0.0504]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.1199, -0.0358]], device='cuda:0', grad_fn=<TanhBackward>)
tensor([[ 0.0968, -0.0547]], d

KeyboardInterrupt: 