In [1]:
%load_ext autoreload
from einops import rearrange
from glob import glob
import torch
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import os
import random
from torch.nn import L1Loss
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm.auto import tqdm
from models.perciever import Perceiver
from models.perciever_rnn import PerceiverRNN, MLPPredictor
from data_prep.nvidia import NvidiaDatasetRNN


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# dataset_path = 'data_stuff/rally-estonia-cropped-antialias'
dataset_path = '/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias'
dataset_subdirs = glob(dataset_path + '/*')
dataset_subdir = dataset_subdirs[0]
print(dataset_subdir)
image_paths = glob(dataset_subdir + '/front_wide/*.png')
image = Image.open(image_paths[0])
pixels = list(image.getdata())
print(len(pixels))
pixel_tens = torch.Tensor(pixels)
print(pixel_tens.shape)
pixel_tens = pixel_tens.view(68, 264, 3)
print(pixel_tens.shape)

/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-05-20-12-36-10_e2e_sulaoja_20_30
17952
torch.Size([17952, 3])
torch.Size([68, 264, 3])


In [3]:
batch = pixel_tens.view(1, *pixel_tens.shape)
print(batch.dtype)

torch.float32


In [4]:
pmodel = Perceiver(
    input_channels = 3,          # number of channels for each token of the input
    input_axis = 2,              # number of axis for input data (2 for images, 3 for video)
    num_freq_bands = 6,          # number of freq bands, with original value (2 * K + 1)
    max_freq = 10.,              # maximum frequency, hyperparameter depending on how fine the data is
    depth = 1,                   # depth of net. The shape of the final attention mechanism will be:
                                 #   depth * (cross attention -> self_per_cross_attn * self attention)
    num_latents = 256,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 512,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 8,            # number of heads for latent self attention, 8
    cross_dim_head = 64,         # number of dimensions per cross attention head
    latent_dim_head = 64,        # number of dimensions per latent self attention head
    num_classes = 1,          # output number of classes
    attn_dropout = 0.,
    ff_dropout = 0.,
    weight_tie_layers = False,   # whether to weight tie layers (optional, as indicated in the diagram)
    fourier_encode_data = True,  # whether to auto-fourier encode the data, using the input_axis given. defaults to True, but can be turned off if you are fourier encoding the data yourself
    self_per_cross_attn = 2      # number of self attention blocks per cross attention
)

steering_classifier = MLPPredictor(512, 64)

In [5]:
BATCH_SIZE=512
NUM_WORKERS=2
SEQ_LENGTH=8
STRIDE=4

# RNN dataloader

# dataset_path = Path('./data_stuff/rally-estonia-cropped-antialias')
dataset_path = Path('/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias')
random.seed(42)
data_dirs = os.listdir(dataset_path)
random.shuffle(data_dirs)
split_index1 = int(0.4 * len(data_dirs))
split_index2 = int(0.5 * len(data_dirs))

train_paths = [dataset_path / dir_name for dir_name in data_dirs[:split_index1]]
valid_paths = [dataset_path / dir_name for dir_name in data_dirs[split_index1:split_index2]]

train_dataset = NvidiaDatasetRNN(train_paths, SEQ_LENGTH, STRIDE)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                                                   num_workers=NUM_WORKERS, pin_memory=True,
                                                   persistent_workers=True, collate_fn=train_dataset.collate_fn)
print('Validation set...')
valid_dataset = NvidiaDatasetRNN(valid_paths, SEQ_LENGTH, STRIDE)
print('Dataset ready')
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False,
                                                   num_workers=NUM_WORKERS, pin_memory=True,
                                                   persistent_workers=False, collate_fn=train_dataset.collate_fn)

[NvidiaDataset] Using default transform: Compose(
    <data_prep.nvidia.Normalize object at 0x2b44f0964fa0>
)
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-10-14-13-08-51_e2e_rec_vahi_backwards: length=13442, filtered=0
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-10-11-17-20-12_e2e_rec_backwards: length=68954, filtered=5
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-06-07-14-06-31_e2e_rec_ss6: length=3003, filtered=0
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-05-20-12-51-29_e2e_sulaoja_20_30: length=5393, filtered=0
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-06-10-13-19-22_e2e_ss4_backwards: length=23844, filtered=1
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-10-11-16-06-44_e2e_rec_ss2: length=81250, filtered=1
/gpfs/space/projects/rally2023/rally-estonia-cropped-antialias/2021-09-24-12-02-32_e2e_rec_ss10_3: length=8142, filtered=0
/gpfs/space/pr

In [8]:
# RNN Forward pass

batch = next(iter(train_loader)) 

images = batch[0]['image'] # shape B, T, C, H, W
targets = batch[1] # shape B, T

images = rearrange(images, 'b t c h w -> t b h w c')
targets = rearrange(targets, 'b t -> t b')
latents = None
# for t in range(images.shape[0]):
#     t_img_batch = images[t]
#     steering, latents = percieverRNN.forward(t_img_batch)

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/gpfs/space/home/okuu/rally-challenge-24/data_prep/nvidia.py", line 172, in __getitem__
    target[0, :] = target_values
ValueError: could not broadcast input array from shape (104,) into shape (8,)


In [9]:
targets.shape

torch.Size([8, 16])

In [7]:
tens = next(iter(train_loader))[0]['image'][0]
for i in range(len(tens)):
    plt.imshow(tens[i].permute(1, 2, 0))
    plt.axis('off')
    plt.show()

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/gpfs/space/home/okuu/rally-challenge-24/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/gpfs/space/home/okuu/rally-challenge-24/data_prep/nvidia.py", line 172, in __getitem__
    target[0, :] = target_values
ValueError: could not broadcast input array from shape (112,) into shape (8,)


In [17]:
LEARNING_RATE = 1e-3
DECAY = 1e-02
SAVE_DIR = Path('trained_models')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)
criterion = L1Loss().to(device)
model = PerceiverRNN(pmodel, steering_classifier).to(device)
optimizer = torch.optim.AdamW(model.parameters(),
                              lr=LEARNING_RATE,
                              betas=(0.9, 0.999),
                              eps=1e-08,
                              weight_decay=DECAY,
                              amsgrad=False)

Using device: cuda


In [20]:
def calculate_whiteness(steering_angles, fps):
    current_angles = steering_angles[:-1]
    next_angles = steering_angles[1:]
    delta_angles = next_angles - current_angles

    whiteness = np.sqrt(((delta_angles * fps) ** 2).mean())
    return whiteness

def calculate_open_loop_metrics(predicted_steering, true_steering, fps):
    predicted_degrees = predicted_steering / np.pi * 180
    true_degrees = true_steering / np.pi * 180

    somewhere_middle = len(predicted_degrees) // 2

    errors_signed = predicted_degrees - true_degrees # positive means error to the left
    errors = np.abs(errors_signed)
    whiteness = calculate_whiteness(predicted_degrees, fps)
    expert_whiteness = calculate_whiteness(true_degrees, fps)

    return {
        'mae': errors.mean(),
        'rmse': np.sqrt((errors ** 2).mean()),
        'bias': errors_signed.mean(),
        'max': errors.max(),
        'whiteness': whiteness,
        'expert_whiteness': expert_whiteness
    }

def calculate_metrics(fps, predictions, valid_loader):
        frames_df = valid_loader.dataset.frames

        true_steering_angles = frames_df.steering_angle.to_numpy()
        metrics = calculate_open_loop_metrics(predictions, true_steering_angles, fps=fps)

        left_turns = frames_df["turn_signal"] == 0
        if left_turns.any():
            left_metrics = calculate_open_loop_metrics(predictions[left_turns], true_steering_angles[left_turns], fps=fps)
            metrics["left_mae"] = left_metrics["mae"]
        else:
            metrics["left_mae"] = 0

        straight = frames_df["turn_signal"] == 1
        if straight.any():
            straight_metrics = calculate_open_loop_metrics(predictions[straight], true_steering_angles[straight], fps=fps)
            metrics["straight_mae"] = straight_metrics["mae"]
        else:
            metrics["straight_mae"] = 0

        right_turns = frames_df["turn_signal"] == 2
        if right_turns.any():
            right_metrics = calculate_open_loop_metrics(predictions[right_turns], true_steering_angles[right_turns], fps=fps)
            metrics["right_mae"] = right_metrics["mae"]
        else:
            metrics["right_mae"] = 0

        return metrics

In [18]:
def train_epoch(model, loader, optimizer, criterion, progress_bar, epoch):
    running_loss = 0.0
    model.train()

    for i, (data, target_values, condition_mask) in enumerate(loader):
        optimizer.zero_grad()

        inputs = data['image'].to(device) # (T, B, H, W, C)
        target_values = target_values.to(device) # (T, B)
        
        latents = None
        total_loss = 0.0
        
        for t in range(inputs.size(0)):
            input_frame = inputs[t]
            target_frame = target_values[t]

            predictions, latents = model(input_frame, latents)

            # Calculate loss for the current time step
            loss = criterion(predictions.squeeze(), target_frame)
            loss.backward(retain_graph=True if t < inputs.size(0) - 1 else False)
            
            total_loss += loss

        optimizer.step()
        running_loss += total_loss.item()

        progress_bar.update(1)
        pbar_description = f'epoch {epoch+1} | train loss: {(running_loss / (i + 1)):.4f}'
        progress_bar.set_description(pbar_description)

    avg_loss = running_loss / len(loader)

    return avg_loss

In [19]:
def evaluate(model, loader, criterion, progress_bar, epoch, train_loss):
        epoch_loss = 0.0
        model.eval()
        all_predictions = []

        with torch.no_grad():
            for i, (data, target_values, condition_mask) in enumerate(loader):
                inputs = data['image'].to(device)
                target_values = target_values.to(device)
                
                latents = None
                sequence_predictions = []
                
                for t in range(inputs.size(0)):
                    input_frame = inputs[t]
                    target_frame = target_values[t]

                    predictions, latents = model(input_frame, latents)
                    sequence_predictions.append(predictions)

                    loss = criterion(predictions.squeeze(), target_frame)
                    epoch_loss += loss.item()
                
                all_predictions.extend(torch.stack(sequence_predictions).cpu().squeeze().numpy())

                progress_bar.update(1)
                progress_bar.set_description(f'epoch {epoch + 1} | train loss: {train_loss:.4f} | valid loss: {(epoch_loss / (i + 1)):.4f}')

        total_loss = epoch_loss / len(loader)
        result = np.array(all_predictions)
        return total_loss, result

In [None]:
def train(train_loader, valid_loader, n_epoch, patience=10, fps=30, load_checkpoint=False):
    CHECKPOINT_FILENAME="checkpoint-2.pth.tar"
    
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
    
    if not load_checkpoint:
        best_valid_loss = float('inf')
        epochs_of_no_improve = 0
        epochs_passed = 0
        train_loss_list = []
        valid_loss_list = []
    else:
        checkpoint = torch.load(SAVE_DIR / CHECKPOINT_FILENAME)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        epochs_passed = checkpoint['epoch']
        best_valid_loss = checkpoint['best_valid_loss']
        epochs_of_no_improve = checkpoint['epochs_of_no_improve']
        train_loss_list = checkpoint['train_loss_list']
        valid_loss_list = checkpoint['valid_loss_list']

    if wandb_logging:
        wandb.watch(model, criterion)

    for epoch in range(n_epoch - epochs_passed):
        
        progress_bar = tqdm(total=len(train_loader), smoothing=0)
        train_loss = train_epoch(model, train_loader, optimizer, criterion, progress_bar, epoch)
        train_loss_list.append(train_loss)

        progress_bar.reset(total=len(valid_loader))
        valid_loss, predictions = evaluate(model, valid_loader, criterion, progress_bar, epoch, train_loss)
        valid_loss_list.append(valid_loss)

        scheduler.step(valid_loss)
        
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            
            torch.save(model.state_dict(), SAVE_DIR / f"best.pt")
            
            epochs_of_no_improve = 0
            best_loss_marker = '*'
        else:
            epochs_of_no_improve += 1
            best_loss_marker = ''

        torch.save({
            'epoch': epoch + epoch_passed + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
            'best_valid_loss': best_valid_loss,
            'epochs_of_no_improve': epochs_of_no_improve,
            'train_loss_list': train_loss_list,
            'valid_loss_list': valid_loss_list
        }, SAVE_DIR / f"checkpoint-{epoch+epochs_passed}.pth.tar")
            
        metrics = calculate_metrics(fps, predictions, valid_loader)
        whiteness = metrics['whiteness']
        mae = metrics['mae']
        left_mae = metrics['left_mae']
        straight_mae = metrics['straight_mae']
        right_mae = metrics['right_mae']
        progress_bar.set_description(f'{best_loss_marker}epoch {epoch + 1}'
                                        f' | train loss: {train_loss:.4f}'
                                        f' | valid loss: {valid_loss:.4f}'
                                        f' | whiteness: {whiteness:.4f}'
                                        f' | mae: {mae:.4f}'
                                        f' | l_mae: {left_mae:.4f}'
                                        f' | s_mae: {straight_mae:.4f}'
                                        f' | r_mae: {right_mae:.4f}')

        if wandb_logging:
            metrics['epoch'] = epoch + 1
            metrics['train_loss'] = train_loss
            metrics['valid_loss'] = valid_loss
            wandb.log(metrics)

        if epochs_of_no_improve == patience:
            print(f'Early stopping, on epoch: {epoch + 1}.')
            break

    torch.save(model.state_dict(), SAVE_DIR / "last.pt")
    pt_models = [f'{SAVE_DIR}/last.pt', f'{SAVE_DIR}/best.pt']

    for pt_model_path in pt_models:
        model_path = convert_pt_to_onnx(pt_model_path)

    return best_valid_loss, train_loss_list, valid_loss_list