In [1]:
import json
import pandas as pd
import PIL
import torch
import torch.nn.functional as F
import torchvision

from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms, tv_tensors
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

2024-07-30 11:38:15.270979: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Analysing the dataset:

In [3]:
with open('Datasets/train/_annotations.coco.json') as file:
    my_dict = json.load(file)


In [4]:
print(my_dict.keys())
print(my_dict['annotations'][0:2])
print(my_dict['images'][0:2])
print(my_dict['categories'])

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])
[{'id': 0, 'image_id': 0, 'category_id': 3, 'bbox': [259, 49, 4.8, 9.6], 'area': 46.08, 'segmentation': [[264, 48.8, 259.2, 48.8, 259.2, 58.4, 264, 58.4, 264, 48.8]], 'iscrowd': 0}, {'id': 1, 'image_id': 0, 'category_id': 3, 'bbox': [284, 630, 4.8, 8.8], 'area': 42.24, 'segmentation': [[288.8, 630.4, 284, 630.4, 284, 639.2, 288.8, 639.2, 288.8, 630.4]], 'iscrowd': 0}]
[{'id': 0, 'license': 1, 'file_name': 'P2491__1-0__1200___1764_png_jpg.rf.00342c6c14ae53b3bfadd7995643e1bc.jpg', 'height': 640, 'width': 640, 'date_captured': '2023-12-20T13:55:24+00:00'}, {'id': 1, 'license': 1, 'file_name': '4f833867-273e-4d73-8bc3-cb2d9ceb54ef_0_0_jpg.rf.000c42e196c096916dfe7c0744d06e12.jpg', 'height': 640, 'width': 640, 'date_captured': '2023-12-20T13:55:24+00:00'}]
[{'id': 1, 'name': 'Aircraft'}, {'id': 2, 'name': 'ship'}, {'id': 3, 'name': 'vehicle'}]


## Creating the dataset

In [5]:
class AerialViewDataset(Dataset):
    def __init__(self, root_folder) -> None:
        super().__init__()
        self.root_folder = root_folder

        with open(f'{self.root_folder}/_annotations.coco.json') as file:
            self.raw_dictionary = json.load(file)

        self.list_of_image_dictionaries = self.raw_dictionary['images']

        # Adds all the annotations to the dictionary containing a particular images' details under the key 'list_of_annotations'
        for dict in self.list_of_image_dictionaries:
            dict['list_of_annotations'] = []
            for annotation in self.raw_dictionary['annotations']:
                if annotation['image_id'] == dict['id']:
                    dict['list_of_annotations'].append(annotation)


    def __getitem__(self, index):
        """
        Returns a tuple containing 'Image' and 'Target'
        """
        image_path = f'{self.root_folder}' + self.raw_dictionary['images'][index]['file_name']
        transformer = transforms.PILToTensor()
        with Image.open(image_path) as pil_image:
            image = transformer(pil_image)
            image = image.float()
        target = {}
        target['area'] = []
        target['boxes'] = []
        target['image_id'] = torch.tensor(self.list_of_image_dictionaries[index]['id'])
        target['labels'] = []

        for annotation in self.list_of_image_dictionaries[index]['list_of_annotations']:
            target['area'].append(annotation['area'])
            target['boxes'].append(annotation['bbox'])
            target['labels'].append(annotation['category_id'])
        
        target['area'] = torch.Tensor(target['area']).float()

        # Convert the boxes attribute to tensors and then format it to xyxy from xywh
        target['boxes'] = tv_tensors.BoundingBoxes(target['boxes'], format='xywh', canvas_size=(640, 640))
        target['boxes'] = torchvision.ops.box_convert(target['boxes'],  in_fmt='xywh', out_fmt='xyxy')

        target['labels'] = torch.Tensor(target['labels']).long()
        
        target['area'].to(device)
        target['boxes'].to(device)
        target['image_id'].to(device)
        target['labels'].to(device)

        return (image, target)
    
    def __len__(self):
        return len(self.raw_dictionary['images'])

In [6]:
train_dataset = AerialViewDataset('Datasets/train/')
valid_dataset = AerialViewDataset('Datasets/valid/')
test_dataset = AerialViewDataset('Datasets/test/')

In [7]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

In [8]:
test_dataset[1][1]

{'area': tensor([44.8000, 33.2800, 35.8400, 40.9600, 38.4000]),
 'boxes': tensor([[293.0000, 293.0000, 297.0000, 304.2000],
         [299.0000, 109.0000, 302.2000, 119.4000],
         [298.0000,  60.0000, 301.2000,  71.2000],
         [322.0000, 147.0000, 325.2000, 159.8000],
         [318.0000, 154.0000, 321.2000, 166.0000]]),
 'image_id': tensor(1),
 'labels': tensor([3, 3, 3, 3, 3])}

In [9]:
def show_bounding_boxes(image:Image, bounding_boxes:tv_tensors.BoundingBoxes):
    transformer = transforms.PILToTensor()
    image_tensor = transformer(image)
    annotated_image_tensor = torchvision.utils.draw_bounding_boxes(image_tensor, bounding_boxes, colors='green')
    transforms.functional.to_pil_image(annotated_image_tensor).show()
    
def show_bounding_boxes_from_image_id(image_id:int, root_path:str = 'Datasets/train/', list_of_images:list = my_dict['images'], list_of_annotations:list = my_dict['annotations']):
    raw_box_coords = []
    for annotation in list_of_annotations:
        if annotation['image_id'] == image_id:
            raw_box_coords.append(annotation['bbox'])
    
    tensored_box_coords = tv_tensors.BoundingBoxes(raw_box_coords, format='xywh', canvas_size=(640, 640))
    formatted_box_coords = torchvision.ops.box_convert(tensored_box_coords, in_fmt='xywh', out_fmt='xyxy')

    image_path = f'{root_path}' + list_of_images[image_id]['file_name']

    with Image.open(image_path) as image:
        show_bounding_boxes(image, formatted_box_coords)
    


In [10]:
show_bounding_boxes(transforms.functional.to_pil_image(test_dataset[161][0]), test_dataset[161][1]['boxes'])

## Defining the Model

In [11]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
dir(model)
#model.modules
num_classes = 4 

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [12]:
batch = test_dataset[0]
features = [test_dataset[0][0], test_dataset[1][0]]
targets = [test_dataset[0][1], test_dataset[1][1]]

features = [img.to(device) for img in features]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

print(model(features, targets))
model.eval()
#print(model(features, targets))
model.train()


{'loss_classifier': tensor(107.7279, device='cuda:0', grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(4.8898, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(38.3661, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(104.8354, device='cuda:0', grad_fn=<DivBackward0>)}


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [13]:
# The training loop. Takes in a model, the training and validation data loaders, the number of epochs and the initial learning rate
def train(model, train_loader, validation_loader, epochs = 10, learning_rate = 1, model_name:str = "My Model"):

    torch.cuda.empty_cache()

    # Set the optimiser to be an instance of the stochastic gradient descent class
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimiser = torch.optim.SGD(parameters, lr=learning_rate)

    # Define a learning rate scheduler as an instance of the ReduceLROnPlateau class
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser, mode='min', patience=50, cooldown=7, eps=1e-20)

    # Writer will be used to track model performance with TensorBoard
    writer = SummaryWriter()

    # Keep track of the number of batches to plot model performace against
    batch_index = 0
    
    # Prints an validation score
    #print(f"Initial validation accuracy score{accuracy_score_from_valiadation(model, validation_loader)}")

    # Create a dictionary to store the best model parameters
    #best_model_parameters = {'Epoch':-1, 'Accuracy':0, 'Parameters':model.state_dict()}

    # Loop over the number of epochs
    for epoch in range(epochs):

        # Within each epoch, we pass through the entire training data in batches indexed by batch
        for batch in train_loader:
            # Loads features and labels into device for performance improvements
            features, labels = batch
            model.train()
            
            features = list(img.to(device) for img in features)

            print([img.device for img in features])

            labels = [{k: v.to(device) for k, v in t.items()} for t in labels]


            loss_dict = model(features, labels)

            # Calculate the loss via cross_entropy
            loss = loss_dict['loss_objectness']

            # Create the grad attributes
            loss.backward() 

            # Clip the loss value so it doesn't become Nan
            torch.nn.utils.clip_grad_norm_(parameters, 4)

            # Print the performance
            print(f"Epoch: {epoch}, batch index: {batch_index}, learning rate: {scheduler.get_last_lr()}, loss:{loss.item()}")

            # Perform one step of stochastic gradient descent
            optimiser.step()

            # Zero the gradients (Apparently set_to_none=True imporves performace)
            optimiser.zero_grad(set_to_none=True)

            # Feed the loss amount into the learning rate scheduler to decide the next learning rate
            scheduler.step(loss.item())

            # Write the performance to the TensorBoard plot
            writer.add_scalar('loss', loss.item(), batch_index)

            # Increment the batch index
            batch_index += 1
        
        # Print the validation loss
        """
        print('Calculating validation accuracy')
        accuracy = accuracy_score_from_valiadation(model, validation_loader)
        print(f"Epoch {epoch}, validation accuracy score{accuracy}")
        """
        # Check if the model has the best perfomrance and save the parameters to 'best_model.pt'
        """
        if accuracy > best_model_parameters['Accuracy']:
            best_model_parameters['Epoch'] = epoch
            best_model_parameters['Accuracy'] = loss.item()
            best_model_parameters['Parameters'] = model.state_dict()
            torch.save(model.state_dict(), f'model_evaluation/weights/{model_name}_best_model.pt')
        """
    print('Loading best model')
    
    #Update model parameters with the best model parameters:
    #model.load_state_dict(best_model_parameters['Parameters'])
    #print(f'The best model has validation accuracy {accuracy_score_from_valiadation(model, validation_loader)}')

def accuracy_score_from_valiadation(model, validation_loader):
    """
    Calculates the accuracy using the WHOLE of the validation dataset.
    """
    predictions = torch.zeros(0).to(device)
    labels = torch.zeros(0).to(device)

    for batch_index, batch in enumerate(validation_loader):
        features, target = batch
        target.to(device)
        if torch.is_tensor(features):
            features = features.to(device)
        model.to(device)
        target = target.to(device)
        predictions = torch.cat((predictions, model(features).max(dim = 1).indices))
        labels = torch.cat((labels, target))
    
    accuracy_score = torch.sum(predictions == labels) / len(predictions)

    return accuracy_score


In [14]:
train(model, epochs=1, learning_rate=0.01, train_loader=train_loader, validation_loader=valid_loader)

[device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0)]
Epoch: 0, batch index: 0, learning rate: [0.01], loss:147.78604125976562
[device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0)]
Epoch: 0, batch index: 1, learning rate: [0.01], loss:361.88671875
[device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0)]
Epoch: 0, batch index: 2, learning rate: [0.01], loss:83.92477416992188
[device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0)]
Epoch: 0, batch index: 3, learning rate: [0.01], loss:238.52716064453125
[device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0), device(type='cuda', index=0)]
Epoch: 0, batch index: 4, learning rate: [0.01], loss:34.35378646850586
[device(type='cuda', index=0), device(

OutOfMemoryError: CUDA out of memory. Tried to allocate 684.00 MiB. GPU 