## Data gathering and UNet model implementation

### Imports

In [None]:
import os
from google.colab import drive
from google.cloud import storage
drive.mount('/content/drive')

import pandas as pd
import numpy as np

from PIL import Image
from io import BytesIO
from matplotlib import pyplot as plt
import ipyplot

from keras.models import Model, load_model
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D
from keras.layers import Concatenate, add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam

import torch
import torch.nn as nn

from skimage.io import imread
from torch.utils import data

from torchvision.transforms.functional import pad

from skimage.transform import resize

from transforms import (
    ComposeDouble,
    FunctionWrapperDouble,
    create_dense_target,
    normalize,
)

import torch.nn.functional as F

from torch.cuda.amp import autocast, GradScaler
import torch.optim as optim
import time

### Grabbing data from Google bucket
First, we want to set up credentials and the client,

In [None]:
### google bucket pre sets for data
google_application_credentials = '/content/drive/MyDrive/Colab Notebooks/food-V2.json'
bucket_name = 'waymo_perception_data' #you need to name your bucket
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_application_credentials
client = storage.Client()


Then, we iterate through two different folders of information. One being the camera images and the other being the segmented image. For memory ease, we only iterate through some of the files. Finally, we merge based on some key information so that each image is connected with its appropiate pair.

In [None]:
#--------- Get image data ------------
bucket = client.get_bucket(bucket_name)

df_image = pd.DataFrame()
folder_name = 'training/camera_image/'
i = -1

for blob in  bucket.list_blobs(prefix=folder_name):
  i += 1
  object_name = blob.name[len(folder_name):]
  temp = "temp_file.parquet"
  content = blob.download_to_filename(temp)
  df = pd.read_parquet(temp)
  if df_image.empty:
    df_image = df
  else:
    df_image = pd.concat([df_image,df])
  if i == 9:
    break


# --------- Get segmented data ----------
bucket = client.get_bucket(bucket_name)

df_seg = pd.DataFrame()
i = -1
for blob in  bucket.list_blobs(prefix='training/camera_segmentation/'):
  i += 1
  object_name = blob.name[len(folder_name):]
  temp = "temp_file.parquet"
  content = blob.download_to_filename(temp)
  df = pd.read_parquet(temp)
  if df_seg.empty:
    df_seg = df
  else:
    df_seg = pd.concat([df_seg,df])
  if i == 9:
    break
  
# The merged dataframe
df_tot = pd.merge(df_image,df_seg,on=['key.segment_context_name','key.frame_timestamp_micros','key.camera_name'])[['[CameraImageComponent].image','[CameraSegmentationLabelComponent].panoptic_label']]
df_tot = df_tot.sample(100, ignore_index=True)

print("There are" , len(df_tot) ,"images") #checking the results of the merge
df_tot.columns = ['image','seg_label'] #rename columns for typing ease

# Help clear up some space
del df_image
del df_seg

Let's preview the images to see that the dataframes merged correctly.

In [None]:
img = Image.open(BytesIO(df_tot['image'][3]))
print(img.format,img.size,img.mode)
seg = Image.open(BytesIO(df_tot['seg_label'][3]))
print(seg.format,seg.size,seg.mode)

# display images
ipyplot.plot_images([img,seg], max_images=2, img_width=500)

### UNET Model and Helper Functions
UNet is a type of CNN neural network that works well with small batches of images for segmentation tasks. First, we are going to transform the data to best work with pytorch. A custom collate function is used in the case of uneven tensors being input into the model.

In [None]:
class SegmentationDataSet(data.Dataset):
    def __init__(self,
                 inputs: list,
                 targets: list,
                 transform=None
                 ):
        self.inputs = inputs
        self.targets = targets
        self.transform = transform
        self.inputs_dtype = torch.float32
        self.targets_dtype = torch.long

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self,
                    index: int):
        # Select the sample
        input_ID = self.inputs[index]
        target_ID = self.targets[index]

        # Load input and target
        x = np.array(Image.open(BytesIO(input_ID)))
        y = np.array(Image.open(BytesIO(target_ID)))

        # Preprocessing
        if self.transform is not None:
            x, y = self.transform(x, y)

        # Typecasting
        x, y = torch.from_numpy(x).type(self.inputs_dtype), torch.from_numpy(y).type(self.targets_dtype)

        return x, y
    
# Custom collate function to handle varying tensor sizes
def custom_collate(batch):
    # Get the maximum height and width in the current batch
    max_height = max([img.shape[1] for img, _ in batch])
    max_width = max([img.shape[2] for img, _ in batch])

    # Pad or resize images and labels to the maximum size
    padded_images = []
    padded_labels = []
    for img, label in batch:
        img_padded = pad(img, padding= (0, max_width - img.shape[2], 0, max_height - img.shape[1]), fill=0)
        label_padded = pad(label, padding = (0, max_width - label.shape[1], 0, max_height - label.shape[0]), fill=255)

        label_padded = label_padded.unsqueeze(0)

        padded_images.append(img_padded)
        padded_labels.append(label_padded)



    # Stack the padded images and labels into a batch
    batch_images = torch.stack(padded_images, dim=0)
    batch_labels = torch.stack(padded_labels, dim=0)

    return batch_images, batch_labels

Next, we apply a pipeline of transformations to the training data. This is to help prevent data leakage, as well as streamline our process. We resize the images to be smaller in order to prevent any memory issues when developing our model. 

Consider adding more transformations in order to add robustness to the model.

In [None]:
# training transformations and augmentations
transforms = ComposeDouble([
        FunctionWrapperDouble(resize,
                          input=True,
                          target=False,
                          output_shape=(640, 960, 3)),
    FunctionWrapperDouble(resize,
                          input=False,
                          target=True,
                          output_shape=(640, 960),
                          order=0,
                          anti_aliasing=False,
                          preserve_range=True),
    FunctionWrapperDouble(create_dense_target, input=False, target=True),
    FunctionWrapperDouble(np.moveaxis, input=True, target=False, source=-1, destination=0),
    FunctionWrapperDouble(normalize),

])

Here, we're going to do a check to see if our SegmentationDataSet and transforms works well and can be fed into a dataloader, alongside our custom collate function.

In [None]:
training_dataset = SegmentationDataSet(inputs=df_tot['image'],
                                       targets=df_tot['seg_label'],
                                       transform=transforms)

training_dataloader = data.DataLoader(dataset=training_dataset,
                                      batch_size=2,
                                      shuffle=True,
                                      collate_fn = custom_collate
                                      )

In [None]:
x, y = next(iter(training_dataloader))

# [N,C,H,W], [N,H,W]
print(f'x = shape: {x.shape}; type: {x.dtype}')
print(f'x = min: {x.min()}; max: {x.max()}')
print(f'y = shape: {y.shape}; class: {y.unique()}; type: {y.dtype}')

The above cell block can be run multiple times in order to give a preview of the transformed data.

### Developing the UNet model archecture

In [None]:
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_c)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_c)
        self.relu = nn.ReLU()
    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        return x


class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = conv_block(in_c, out_c)
        self.pool = nn.MaxPool2d((2, 2))
    def forward(self, inputs):
        x = self.conv(inputs)
        p = self.pool(x)
        return x, p

class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
        self.conv = conv_block(out_c+out_c, out_c)
    def forward(self, inputs, skip):
        x = self.up(inputs)
        x = torch.cat([x, skip], axis=1)
        x = self.conv(x)
        return x

class build_unet(nn.Module):
    def __init__(self, num_classes=1):
        super().__init__()
        """ Encoder """
        self.e1 = encoder_block(3, 64)
        self.e2 = encoder_block(64, 128)
        self.e3 = encoder_block(128, 256)
        self.e4 = encoder_block(256, 512)
        """ Bottleneck """
        self.b = conv_block(512, 1024)
        """ Decoder """
        self.d1 = decoder_block(1024, 512)
        self.d2 = decoder_block(512, 256)
        self.d3 = decoder_block(256, 128)
        self.d4 = decoder_block(128, 64)
        """ Classifier """
        self.num_classes = num_classes
        self.outputs = nn.Conv2d(64, self.num_classes, kernel_size=1, padding=0) #for dynamic classes
    def forward(self, inputs):
        """ Encoder """
        s1, p1 = self.e1(inputs)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)
        s4, p4 = self.e4(p3)
        """ Bottleneck """
        b = self.b(p4)
        """ Decoder """
        d1 = self.d1(b, s4)
        d2 = self.d2(d1, s3)
        d3 = self.d3(d2, s2)
        d4 = self.d4(d3, s1)
        """ Classifier """
        outputs = self.outputs(d4)
        return outputs

Created own loss function and set up variables for hyperparameter tuning

In [None]:
def dice_loss(predicted, target):
    smooth = 1.0  # Smoothing factor to avoid division by zero
    intersection = torch.sum(predicted * target)
    union = torch.sum(predicted) + torch.sum(target)
    dice_coefficient = (2.0 * intersection + smooth) / (union + smooth)
    return 1.0 - dice_coefficient
# Define the loss functions you want to try
loss_functions = {
    #already did dice loss
    "DiceLoss" : dice_loss,
    "CrossEntropyLoss": nn.CrossEntropyLoss(),
    "BCEWithLogitsLoss": nn.BCEWithLogitsLoss(),
}

# Define different hyperparameter settings to try
hyperparameters = {
    "lr": [0.001, 0.01, 0.0001],
    "batch_size": [2, 4, 8],
}

training_dataset = SegmentationDataSet(inputs=df_tot['image'],
                                       targets=df_tot['seg_label'],
                                       transform=transforms)

Hyperparameter tuning loop - optimized to have results saved to google drive for furture analysis as well as optimize GPU memory usage.

In [None]:

num_epochs = 2

# Hyperparameter tuning loop

best_loss = float("inf")
best_loss_function = None
best_hyperparameters = None
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for loss_name, loss_function in loss_functions.items():
  results = []
  for lr in hyperparameters["lr"]:
    for batch_size in hyperparameters["batch_size"]:
      
      training_dataloader = data.DataLoader(dataset=training_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      collate_fn = custom_collate
                                      )

      model = build_unet(num_classes=1).to(device)
      optimizer = optim.Adam(model.parameters(), lr=lr)

      criterion = loss_function
      # Initialize the GradScaler for mixed-precision training
      scaler = GradScaler()

      #Epoch Loop
      for epoch in range(num_epochs):
        loss_per_epoch = []
        model.train()
        total_loss = 0.0
        start_time = time.time()

        for batch_idx, (inputs, labels) in enumerate(training_dataloader):
          batch_start_time = time.time()

          # Move inputs and labels to the GPU
          if device.type == 'cuda':
            inputs = inputs.to(device)
            labels = labels.to(device)

          optimizer.zero_grad()

          # Use autocast to perform mixed-precision training
          
          with autocast():
            outputs = model(inputs)

            labels = labels.to(outputs.dtype)
            loss = criterion(outputs, labels)
            loss_per_epoch.append(loss)

          scaler.scale(loss).backward()
          scaler.step(optimizer)
          scaler.update()

          # Calculate batch time
          batch_time = time.time() - batch_start_time
          total_loss += loss.item()

        # Calculate average loss for the epoch
        average_loss = total_loss / len(training_dataloader)
        # End time for the epoch
        end_time = time.time()
        epoch_time = end_time - start_time
    
        #Print progress with batch and epoch times
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}, Epoch Time: {epoch_time:.2f} seconds")

      # Update best hyperparameters if current loss is better
      if average_loss < best_loss:
        best_loss = average_loss
        best_loss_function = loss_name
        best_hyperparameters = {"lr": lr, "batch_size": batch_size}

      # Save hyperparameter tuning information and loss
      result = {
          "Loss Function": loss_name,
          "Learning Rate": lr,
          "Batch Size": batch_size,
          "Loss per Epoch": loss_per_epoch,
      }
      results.append(result)

  # Convert the results list to a DataFrame
  results_df = pd.DataFrame(results)

  #  Save the results to a CSV file
  str_name = loss_name + "hyperparameter_tuning_results.csv"
  print("Saving all of "+loss_name)
  results_df.to_csv("/content/drive/MyDrive/Colab Notebooks/"+str_name, index=False)
print(f"Best loss function: {best_loss_function}")
print(f"Best hyperparameters: {best_hyperparameters}")