## 0. Set up

In [12]:
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import datetime
from functools import partial
from glob import glob
import math
import multiprocessing
from pathlib import Path
import random
from typing import Any, Dict, Optional
from tqdm.auto import tqdm


import torch
import torchvision
from torch.amp import autocast # allows for differentr datatypes (torch.32/ torch.16)
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import torchvision.transforms.functional as F
from torchvision.ops import masks_to_boxes
from cjm_pytorch_utils.core import pil_to_tensor, tensor_to_pil, get_torch_device, set_seed, denorm_img_tensor, move_data_to_device
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
from cjm_psl_utils.core import download_file, file_extract, get_source_code
from cjm_pandas_utils.core import markdown_to_pandas, convert_to_numeric, convert_to_string
from torch.utils.data import Dataset, DataLoader
from torchtnt.utils import get_module_summary
from torchvision.utils import draw_bounding_boxes




import imageio.v3 as iio
import ipympl
import skimage as ski

# Import Mask R-CNN
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights

In [13]:
device = get_torch_device()
dtype = torch.float32
torch.cuda.empty_cache()

In [14]:
# set up projects's directory
# project's name
project_name = f"pytorch-buildings-maskrcnn"

# path for project's folder
project_dir = Path(f"/{project_name}/")

# create the project directory
project_dir.mkdir(parents=True, exist_ok=True)

## 1. LOAD DATA

In [15]:
idx = 21

### 1.1 Load images

In [16]:
## Load images

images_dir = "C:/Users/kubaw/Desktop/DELFT/CORE/satellite_predictions/Dataset2/Images/Destroyed"  
image_filenames = sorted(os.listdir(images_dir))


In [17]:
# apply transform to the images (normalise)
def load_images_from_folder(images_dir):
    images = []
    transform = T.Compose([T.ToTensor()])
    for filename in os.listdir(images_dir):
        if filename.endswith('.jpeg'):
            img = Image.open(os.path.join(images_dir, filename))
            img_t = transform(img)
            images.append(img_t)
    return images

image_tensors = load_images_from_folder(images_dir)

len(image_tensors)

114

### 1.2 Make image_ids

In [18]:
def generate_index_list(image_tensors):
    num_images = len(image_tensors)
    return list(range(num_images))

image_ids = generate_index_list(image_tensors)

### 1.3 Load Masks

In [19]:
mask_dir = "C:/Users/kubaw/Desktop/DELFT/CORE/satellite_predictions/Dataset2/Masks/Destroyed"  
mask_filenames = sorted(os.listdir(mask_dir))
mask_arrays = []


def load_masks_from_folder(mask_dir):
    masks = []
    for filename in os.listdir(mask_dir):
        if filename.endswith('.jpeg'):
            mask = Image.open(os.path.join(mask_dir, filename))
            mask_arr = np.array(mask)
            masks.append(mask_arr)
    return masks

mask_arrays = load_masks_from_folder(mask_dir)
mask_arrays[0].shape

(64, 64, 3)

## 2. Transform masks

From 64 x 64 x 3 arrays to 64 x 64 booleans for each instance 

In [20]:
def mask_to_2dim(mask_arrays):
    
    # takes a lists of 3 dim numpy arrays, 
    # returns 2dim torch tensor in form of 0 for background, 1 for undamaged, 2 for damaged
    masks_transformed = []
    

    # Define color thresholds for blue and magenta
    blue_lower = np.array([0, 0, 100], dtype=np.uint8)
    blue_upper = np.array([80, 80, 255], dtype=np.uint8)

    magenta_lower = np.array([120, 0, 120], dtype=np.uint8)
    magenta_upper = np.array([255, 100, 255], dtype=np.uint8)
    
    for mask_arr in mask_arrays: 
        
        
        # Create masks for blue and magenta regions
        blue_mask = cv2.inRange(mask_arr, blue_lower, blue_upper)
        magenta_mask = cv2.inRange(mask_arr, magenta_lower, magenta_upper)

        # Combine the masks to get the final transformed mask
        transformed_mask = np.zeros_like(blue_mask)
        transformed_mask[blue_mask > 0] = 1  # Object 1 (Blue)
        transformed_mask[magenta_mask > 0] = 2  # Object 2 (Magenta)
        
        masks_transformed.append(transformed_mask)
        
    # transform to torch tensor
            
    masks_array = np.array(masks_transformed)
    masks_tensor = torch.from_numpy(masks_array)
    
    return masks_tensor



In [21]:
masks_transformed = mask_to_2dim(mask_arrays)
masks_transformed[0].shape

torch.Size([64, 64])

In [22]:
def masks_2_dim_to_booleans(masks_transformed):
    masks_binary = []

    for mask in masks_transformed:
            # use Connected Component Analysis to extract all objects from the image
        
            mask_np, count = ski.measure.label(mask, connectivity=1, return_num=True)
            mask_test = torch.from_numpy(np.array(mask_np))

            # We get the unique colors, as these would be the object ids.
            mask_obj_ids = torch.unique(mask_test)

            # first id is the background, so remove it.
            mask_obj_ids = mask_obj_ids[1:]
        
            # split the color-encoded mask into a set of boolean masks.
            mask_boolean = mask_test == mask_obj_ids[:, None, None]
        
            masks_binary.append(mask_boolean)
    
    return masks_binary
    

In [23]:
masks_binary = masks_2_dim_to_booleans(masks_transformed)
masks_binary[0].shape

torch.Size([4, 64, 64])

## 3. Create bounding boxes with labels

In [110]:
def mask_to_box(masks_transformed):
    
    """
    args: 3 dim array of image masks 
    returns: tuple of bounding boxes, tuple of boxes labels
    """
    
    boxes = []
    labels = []

    for mask in masks_transformed:
        
        labels_mask = []
        
        ### we split the masks into damaged and andamaged tessors
        standing_list = []
        # We get the unique colors, as these would be the object ids.
        obj_ids = torch.unique(mask)

        # first id is the background, so remove it.
        obj_ids = obj_ids[1:]

        if len(obj_ids) == 1:

            # split the color-encoded mask into a set of boolean masks.
            standing = mask == obj_ids[:, None, None]
            standing_tensor = standing.int()
    
        elif len(obj_ids) == 2:
    
            separate_masks = mask == obj_ids[:, None, None]
            standing, collapsed = torch.split(separate_masks, 1, dim=0)
            standing_tensor, collapsed_tensor = standing.int(), collapsed.int()
        
        ### standing buildings
        
        # use Connected Component Analysis to extract all objects from the image

        standing_np, count = ski.measure.label(standing_tensor, connectivity=1, return_num=True)
        standing_test = torch.from_numpy(np.array(standing_np))

        # We get the unique colors, as these would be the object ids.
        standing_obj_ids = torch.unique(standing_test)

        # first id is the background, so remove it.
        standing_obj_ids = standing_obj_ids[1:]

        # split the color-encoded mask into a set of boolean masks.
        standing_boolean = standing_test == standing_obj_ids[:, None, None]
        
        #make boxes (x1, x2, y1, y2)
        standing_boxes_test = masks_to_boxes(standing_boolean)
        
        #label standing boxes

        label1 = 1
        standing_list = [(row) for row in standing_boxes_test]

        for i in range(len(standing_list)):
            labels_mask.append(label1)

        
        ### collapsed buildings
        
        collapsed_list = []
        
        # use Connected Component Analysis to extract all objects from the image
        
        collapsed_np, count = ski.measure.label(collapsed_tensor, connectivity=1, return_num=True)
        collapsed_test = torch.from_numpy(np.array(collapsed_np))

        # We get the unique colors, as these would be the object ids.
        collapsed_obj_ids = torch.unique(collapsed_test)

        # first id is the background, so remove it.
        collapsed_obj_ids = collapsed_obj_ids[1:]
        
        # split the color-encoded mask into a set of boolean masks.
        collapsed_boolean = collapsed_test == collapsed_obj_ids[:, None, None]
        
        #make boxes (x1, x2, y1, y2)
        collapsed_boxes_test = masks_to_boxes(collapsed_boolean)
        
        #make tuple
        collapsed_list = [(row) for row in collapsed_boxes_test]

        #label collapsed boxes

        label2 = 2
        collapsed_list = [(row) for row in collapsed_boxes_test]
        
        for i in range(len(collapsed_list)):
            labels_mask.append(label2)
        
        
        ### append boxes list
        both_lists = standing_list + collapsed_list
        boxes.append(both_lists)
        
        # make boxes to torch.int64 datatype
        
        boxes_int64 = []
        
        for box in boxes:
            
            tensor_box = torch.from_numpy(np.array(box)).to(torch.int64)
            boxes_int64.append(tensor_box)

        
        ### append labels list
        
        labels.append(labels_mask)

        labels_int64 = []
        for label in labels:
            
            tensor_label = torch.from_numpy(np.array(label)).to(torch.int64)
            labels_int64.append(tensor_label)        
        
        
    return boxes_int64, labels_int64


In [112]:
labeled_boxes = mask_to_box(masks_transformed)
boxes, labels = labeled_boxes
boxes[1].dtype

torch.int64

## 4. Concatinate data

In [115]:
## SO FAR WE HAVE

# boxes
len(boxes)

# labels
len(labels)

# masks
len(masks_binary)

# image_ids
len(image_ids)

# images 
len(image_tensors)

114

In [26]:
def concatinate_data(images, boxes, masks, labels): 
    # IN: image tensors, box tensors, binary mask tensors, box labels tensor
    # OUT: list of the image tensor and dictionary of target values at each index
    keys = ['boxes', 'masks', 'labels']
    dataset_targets = []
    for b, l, m in zip(boxes, labels, masks):
    
        data_dict = {
            'boxes': b,
            'labels': l,
            'masks': m
        }
        dataset_targets.append(data_dict)
        
    dataset_fin = []

    for i in range(len(dataset_targets)):
        # Create a tuple with the dictionary and the array
        combined_data = (images[i], dataset_targets[i])
        dataset_fin.append(combined_data)    
    
    return dataset_fin

In [27]:
dataset_fin = concatinate_data(image_tensors, boxes, masks_binary, labels)

NameError: name 'boxes' is not defined

In [132]:
def test_train_split(dataset):
    # Shuffle the list randomly
    random.shuffle(dataset_fin)
    dataset_sh = dataset_fin.copy()

    split_index = int(0.75 * len(dataset_sh))

    # Split the list
    train_dataset = dataset_sh[:split_index]
    test_dataset = dataset_sh[split_index:]
    return train_dataset, test_dataset

train_dataset, test_dataset = test_train_split(dataset_fin)
len(train_dataset)

85

## 5. Make Dataloaders

In [133]:
# Set the training batch size
bs = 10

# Set the number of worker processes for loading data ! multiprocessing.cpu_count()//2 !
num_workers = multiprocessing.cpu_count()//2

def collate_fn(batch):
    return tuple(zip(*batch))

# Define parameters for DataLoader
data_loader_params = {
    'batch_size': bs,  # Batch size for data loading
    'num_workers': num_workers,  # Number of subprocesses to use for data loading
    'collate_fn': collate_fn,
    'pin_memory': True,
    'pin_memory_device': device
}

#Create DataLoader for training data. Data is shuffled for every epoch.
train_dataloader = DataLoader(train_dataset, **data_loader_params, shuffle=True)

# Create DataLoader for validation data. Shuffling is not necessary for validation data.
valid_dataloader = DataLoader(test_dataset, **data_loader_params)


pd.Series({
    'Number of batches in train DataLoader:': len(train_dataloader),
    'Number of batches in validation DataLoader:': len(valid_dataloader)}
).to_frame().style.hide(axis='columns')



0,1
Number of batches in train DataLoader:,9
Number of batches in validation DataLoader:,3


## 7. Loading the Mask R-CNN Model

In [137]:
classes = 3

# Initialize a Mask R-CNN model with pretrained weights
model = maskrcnn_resnet50_fpn_v2(weights='DEFAULT')

# Get the number of input features for the classifier
in_features_box = model.roi_heads.box_predictor.cls_score.in_features
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels

# Get the numbner of output channels for the Mask Predictor
dim_reduced = model.roi_heads.mask_predictor.conv5_mask.out_channels

# Replace the box predictor
model.roi_heads.box_predictor = FastRCNNPredictor(in_channels=in_features_box, num_classes=classes)

# Replace the mask predictor
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_channels=in_features_mask, dim_reduced=dim_reduced, num_classes=classes)

In [138]:
# Set the model's device and data type
model.to(device=device, dtype=dtype);

# Add attributes to store the device and model name for later reference
model.device = device
model.name = 'maskrcnn_resnet50_fpn_v2'

### 7.2 Summarize the model

In [139]:
def model_summarize(model):

    test_inp = torch.randn(1, 3, 256, 256).to(device)

    summary_df = markdown_to_pandas(f"{get_module_summary(model.eval(), [test_inp])}")

    # # Filter the summary to only contain Conv2d layers and the model
    summary_df = summary_df[summary_df.index == 0]

    return summary_df.drop(['In size', 'Out size', 'Contains Uninitialized Parameters?'], axis=1)

model_summarize(model)



Unnamed: 0,Type,# Parameters,# Trainable Parameters,Size (bytes),Forward FLOPs
0,MaskRCNN,45.9 M,45.7 M,183 M,331 G


## 8. Fine-tuning the model

### 8.1 Define the training loop

In [140]:
# the following fx performs a single pass through the training set

def run_epoch(model, dataloader, optimizer, lr_scheduler, device, scaler, is_training):

    """
    Function to run a single training or evaluation epoch.
    
    IN:
        model: A PyTorch model to train or evaluate.
        dataloader: A PyTorch DataLoader providing the data.
        optimizer: The optimizer to use for training the model.
        loss_func: The loss function used for training.
        device: The device (CPU or GPU) to run the model on.
        scaler: Gradient scaler for mixed-precision training.
        is_training: Boolean flag indicating whether the model is in training or evaluation mode.
    
    Returns:
        The average loss for the epoch.
        """
    # Set the model to training mode
    model.train()
    
    epoch_loss = 0  # Initialize the total loss for this epoch
    progress_bar = tqdm(total=len(dataloader), desc="Train" if is_training else "Eval")  # Initialize a progress bar
    
    # Loop over the data
    for batch_id, (inputs, targets) in enumerate(dataloader):
        # Move inputs and targets to the specified device
        inputs = torch.stack(inputs).to(device)
        
        # Forward pass with Automatic Mixed Precision (AMP) context manager
        # provides convenience methods where some operations use the float32 datatype and other operations use float16
        with autocast(torch.device(device).type):
            if is_training:
                losses = model(inputs.to(device), move_data_to_device(targets, device))
            else:
                with torch.no_grad():
                    losses = model(inputs.to(device), move_data_to_device(targets, device))
                    
            # Compute the loss
            loss = sum([loss for loss in losses.values()])  # Sum up the losses

        # If in training mode, backpropagate the error and update the weights
        if is_training:
            if scaler:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                old_scaler = scaler.get_scale()
                scaler.update()
                new_scaler = scaler.get_scale()
                if new_scaler >= old_scaler:
                    lr_scheduler.step()
                    
            else:
                loss.backward()
                optimizer.step()
                lr_scheduler.step()
                
            optimizer.zero_grad()
            
        # Update the total loss
        loss_item = loss.item()
        epoch_loss += loss_item
        
        # Update the progress bar
        progress_bar_dict = dict(loss=loss_item, avg_loss=epoch_loss/(batch_id+1))
        if is_training:
            progress_bar_dict.update(lr=lr_scheduler.get_last_lr()[0])
        progress_bar.set_postfix(progress_bar_dict)
        progress_bar.update()
        
        # If the loss is NaN or infinite, stop the training/evaluation process
        if math.isnan(loss_item) or math.isinf(loss_item):
            print(f"Loss is NaN or infinite at batch {batch_id}. Stopping {'training' if is_training else 'evaluation'}.")
            break
    # Cleanup and close the progress bar 
    progress_bar.close()
    
    # Return the average loss for this epoch
    return epoch_loss / (batch_id + 1)

In [141]:
def train_loop(model, 
               train_dataloader, 
               valid_dataloader, 
               optimizer,  
               lr_scheduler, 
               device, 
               epochs, 
               checkpoint_path, 
               use_scaler=False):
    """
    Main training loop.
    
    Args:
        model: A PyTorch model to train.
        train_dataloader: A PyTorch DataLoader providing the training data.
        valid_dataloader: A PyTorch DataLoader providing the validation data.
        optimizer: The optimizer to use for training the model.
        lr_scheduler: The learning rate scheduler.
        device: The device (CPU or GPU) to run the model on.
        epochs: The number of epochs to train for.
        checkpoint_path: The path where to save the best model checkpoint.
        use_scaler: Whether to scale graidents when using a CUDA device
        
        Returns:
        None
        """
    
    # Initialize a gradient scaler for mixed-precision training if the device is a CUDA GPU
    scaler = torch.cuda.amp.GradScaler() if device.type == 'cuda' and use_scaler else None
    best_loss = float('inf')  # Initialize the best validation loss
    
    
    # Loop over the epochs
    for epoch in tqdm(range(epochs), desc="Epochs"):
        # Run a training epoch and get the training loss
        train_loss = run_epoch(model, train_dataloader, optimizer, lr_scheduler, device, scaler, is_training=True)
        # Run an evaluation epoch and get the validation loss
        with torch.no_grad():
            valid_loss = run_epoch(model, valid_dataloader, None, None, device, scaler, is_training=False)


                    # If the validation loss is lower than the best validation loss seen so far, save the model checkpoint
        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), checkpoint_path)

            # Save metadata about the training process
            training_metadata = {
                'epoch': epoch,
                'train_loss': train_loss,
                'valid_loss': valid_loss, 
                'learning_rate': lr_scheduler.get_last_lr()[0],
                'model_architecture': model.name
            }
            with open(Path(checkpoint_path.parent/'training_metadata.json'), 'w') as f:
                json.dump(training_metadata, f)
                
        # If the training or validation loss is NaN or infinite, stop the training process
        if any(math.isnan(loss) or math.isinf(loss) for loss in [train_loss, valid_loss]):
            print(f"Loss is NaN or infinite at epoch {epoch}. Stopping training.")
            break

    # If the device is a GPU, empty the cache
    if device.type != 'cpu':
        getattr(torch, device.type).empty_cache()

### 8.2 Set the Model Checkpoint Path

In [144]:
# Generate timestamp for the training session (Year-Month-Day_Hour_Minute_Second)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Create a directory to store the checkpoints if it does not already exist
checkpoint_dir = Path(project_dir/f"{timestamp}")

# Create the checkpoint directory if it does not already exist
checkpoint_dir.mkdir(parents=True, exist_ok=True)

# The model checkpoint path
checkpoint_path = checkpoint_dir/f"{model.name}.pth"

print(checkpoint_path)

\pytorch-buildings-maskrcnn\2023-10-14_15-20-25\maskrcnn_resnet50_fpn_v2.pth


### 8.3 Configure the Training Parameters

In [145]:
# Learning rate for the model
lr = 5e-4

# Number of training epochs
epochs = 2

# AdamW optimizer; includes weight decay for regularization
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

# Learning rate scheduler; adjusts the learning rate during training
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 
                                                   max_lr=lr, 
                                                   total_steps=epochs*len(train_dataloader))

## 9. Train!

In [146]:
train_loop(model=model, 
           train_dataloader=train_dataloader,
           valid_dataloader=valid_dataloader,
           optimizer=optimizer, 
           lr_scheduler=lr_scheduler, 
           device=torch.device(device), 
           epochs=epochs, 
           checkpoint_path=checkpoint_path,
           use_scaler=True)

Epochs:   0%|          | 0/2 [00:00<?, ?it/s]

Train:   0%|          | 0/9 [00:00<?, ?it/s]

RuntimeError: DataLoader worker (pid(s) 14244, 17600, 6096, 10912, 10576, 8732, 19980, 17388) exited unexpectedly