In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install awscli

In [None]:
import os
import shutil
from distutils.dir_util import copy_tree
import json

In [None]:
%mkdir Building_Detection
%cd Building_Detection

%mkdir data
%cd data
%mkdir train
%mkdir val
%mkdir train_raw_dataset
%mkdir val_raw_dataset
%cd train
%mkdir images
%mkdir annotations
%cd ..
%cd val
%mkdir images
%mkdir annotations
%cd ..
%cd ..

path = os.getcwd()

train_path = os.path.join(path, "data", "train")
val_path = os.path.join(path, "data", "val")

raw_train_path = os.path.join(path, "data", "train_raw_dataset")
raw_val_path = os.path.join(path, "data", "val_raw_dataset")

train_annot_path = os.path.join(train_path,"annotations")
train_images_path = os.path.join(train_path, "images")

val_annot_path = os.path.join(val_path,"annotations")
val_images_path = os.path.join(val_path, "images")


checkpoint_drive_path = "/content/drive/MyDrive/model_checkpoint"

best_model_path = os.path.join(checkpoint_drive_path, "best.pt ")


#Train Dataset Download Code

In [None]:
!aws s3 cp s3://spacenet-dataset/Hosted-Datasets/fmow/fmow-rgb/train/stadium /content/Building_Detection/data/train_raw_dataset/ --recursive --no-sign-request

#Validation Dataset Donwload Code

In [None]:
!aws s3 ls s3://spacenet-dataset/Hosted-Datasets/fmow/fmow-rgb/ --no-sign-request

In [None]:
!aws s3 cp s3://spacenet-dataset/Hosted-Datasets/fmow/fmow-rgb/val/stadium /content/Building_Detection/data/val_raw_dataset/ --recursive --no-sign-request

In [None]:
def check_bbox(json_dict):
  for elem in json_dict["bounding_boxes"]:
    x_min, y_min, x_max, y_max = elem['box']

    if x_min > x_max or y_min > y_max:
      return False

  return True

In [None]:
def move_raw_datasets(actual_folder, img_destiny, annot_destiny, amount):
  folders = os.listdir(actual_folder)[0:amount]
  for directory in folders:
    source = os.path.join(actual_folder, directory)

    for file in os.listdir(source):
      if file.endswith(".json"):
        shutil.move(os.path.join(source, file), annot_destiny)
        image_name = file.split('.')[0]
        image_name += ".jpg"

        try:
          shutil.move(os.path.join(source, image_name), img_destiny)
        except:
          continue



In [None]:
def delete_bad_data(images_dir, annot_dir):
  data_to_delete = []
  number_of_deleted = 0

  for file in os.listdir(annot_dir):
    with open(os.path.join(annot_dir, file)) as f:
      json_dict = json.load(f)

      for elem in json_dict["bounding_boxes"]:
        x_min, y_min, x_max, y_max = elem["box"]
        if abs(x_min - x_max) < 10e-8 or abs(y_min - y_max) < 10e-8:
          annot2delete = os.path.join(annot_dir, file)
          image_name = file.split('.')[0]
          image_name += ".jpg"
          image2delete = os.path.join(images_dir, image_name)
          data_to_delete.append(annot2delete)
          data_to_delete.append(image2delete)

  for file in data_to_delete:
    os.remove(file)
    number_of_deleted += 1

  return number_of_deleted


In [None]:
move_raw_datasets(raw_train_path, train_images_path, train_annot_path, 200)
move_raw_datasets(raw_val_path, val_images_path, val_annot_path, 20)

In [None]:
print(delete_bad_data(train_images_path, train_annot_path))
print(delete_bad_data(val_images_path, val_annot_path))

In [None]:
import albumentations
from albumentations.pytorch.transforms import ToTensorV2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision import transforms as torchtrans


def resize_image(img_arr, bboxes, h, w):
    """
    :param img_arr: original image as a numpy array
    :param bboxes: bboxes as numpy array where each row is 'x_min', 'y_min', 'x_max', 'y_max', "class_id"
    :param h: resized height dimension of image
    :param w: resized weight dimension of image
    :return: dictionary containing {image:transformed, bboxes:['x_min', 'y_min', 'x_max', 'y_max', "class_id"]}
    """
    # create resize transform pipeline
    transform = albumentations.Compose(
        [albumentations.Resize(height=h, width=w, always_apply=True)],
        bbox_params=albumentations.BboxParams(format='pascal_voc'))

    for box in bboxes:
      if box[0] > box[2]:
        temp = box[0]
        box[0] = box[2]
        box[2] = temp

      if box[1] > box[3]:
        temp = box[1]
        box[1] = box[3]
        box[3] = temp

    transformed = transform(image=img_arr, bboxes=bboxes)

    return transformed


# Function to visualize bounding boxes in the image
def plot_img_bbox(img, target, classes):
  # plot the image and bboxes
  # Bounding boxes are defined as follows: x-min y-min width height
  if "scores" not in target.keys():
    scores = [100 for i in range(len(target['boxes']))]
  else:
    scores = target['scores']

  fig, a = plt.subplots(1,1)
  fig.set_size_inches(5,5)
  try:
    a.imshow(img)
  except Exception:
    a.imshow(img.T)
  for box, label, score in zip(target['boxes'], target['labels'], scores):
    x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
    print(x, y, width, height)
    rect = patches.Rectangle(
      (x, y),
      width, height,
      linewidth = 2,
      edgecolor = 'r',
      facecolor = 'none',
    )
    a.text(x, y, f"{classes[label]}:{score}")
    # Draw the bounding box on top of the image
    a.add_patch(rect)
  plt.show()


def copy_raw_datasets(actual_folder, img_destiny, annot_destiny):

  for directory in os.listdir(actual_folder):
    source = os.path.join(actual_folder, directory)
    if directory.endswith(".json"):
      shutil.copy(source, annot_destiny)
    else:
      shutil.copy(source, img_destiny)


# Send train=True for training transforms and False for val/test transforms
def get_transform(train):
  if train:
    return albumentations.Compose(
      [
        albumentations.HorizontalFlip(0.5),
        # ToTensorV2 converts image to pytorch tensor without div by 255
        ToTensorV2(p=1.0)
      ],
      bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}
    )
  else:
    return albumentations.Compose(
      [ToTensorV2(p=1.0)],
      bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}
    )


# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
  return torchtrans.ToPILImage()(img).convert('RGB')

def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
import torch
import os
import cv2
import json
import numpy as np

# we create a Dataset class which has a __getitem__ function and a __len__ function
class BuildingImageDataset(torch.utils.data.Dataset):

  def __init__(self, images_dir, annot_dir, width, height, transforms=None):
    self.transforms = transforms
    self.images_dir = images_dir
    self.annot_dir = annot_dir
    self.height = height
    self.width = width

    # sorting the images for consistency
    # To get images, the extension of the filename is checked to be jpg
    self.imgs = [image for image in sorted(os.listdir(self.images_dir)) if image[-4:]=='.jpg']

    # classes: 0 index is reserved for background
    self.classes = ['background', 'stadium']

  def __getitem__(self, idx):
    img_name = self.imgs[idx]
    image_path = os.path.join(self.images_dir, img_name)

    # reading the images and converting them to correct size and color
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)

    # annotation file
    annot_filename = img_name[:-4] + '.json'
    annot_file_path = os.path.join(self.annot_dir, annot_filename)

    image_info = []

    with open(annot_file_path) as f:

      json_dict = json.load(f)

      for elem in json_dict["bounding_boxes"]:
        label = self.classes.index(elem["category"])

        box = elem['box']

        xmin = max(0, int(float(box[0])))
        ymin = max(0, int(float(box[1])))
        xmax = min(int(json_dict["img_width"]), int(float(box[2])))
        ymax = min(int(json_dict["img_height"]), int(float(box[3])))

        if xmin >= xmax:
          temp = xmin
          xmin = xmax
          xmax = temp

        if ymin >= ymax:
          temp = ymin
          ymin = ymax
          ymax = temp

        image_info.append([xmin, ymin, xmax, ymax, label])

      f.close()

    image_info = np.array(image_info)

    transformed_dict = resize_image(img_rgb, image_info, 224, 224)

    # contains the image as array
    img_res = np.asarray(transformed_dict["image"])

    # diving by 255
    img_res /= 255.0

    boxes = []
    labels = []

    for elem in transformed_dict["bboxes"]:
      boxes.append([elem[0], elem[1], elem[2], elem[3]])
      labels.append(elem[4])

    # convert boxes into a torch.Tensor
    boxes = torch.as_tensor(boxes, dtype=torch.float32)

    # getting the areas of the boxes
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

    # suppose all instances are not crowd
    iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

    labels = torch.as_tensor(labels, dtype=torch.int64)

    target = {}
    target["boxes"] = boxes
    target["labels"] = labels
    target["area"] = area
    target["iscrowd"] = iscrowd
    image_id = torch.tensor([idx])
    target["image_id"] = image_id

    if self.transforms:
      sample = self.transforms(image = img_res,
                                bboxes = target['boxes'],
                                labels = labels)
      img_res = sample['image']
      target['boxes'] = torch.Tensor(sample['bboxes'])

    return img_res, target

  def __len__(self):
    return len(self.imgs)

In [None]:
classes = ['background', 'stadium']
# use our dataset and defined transformations
train_dataset = BuildingImageDataset(train_images_path, train_annot_path, 480, 480, transforms=get_transform(train=False))
val_dataset = BuildingImageDataset(val_images_path, val_annot_path, 480, 480, transforms=get_transform(train=False))

# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
  train_dataset,
  batch_size=10,
  shuffle=True,
  num_workers=0,
  collate_fn=collate_fn,
)

print(len(train_loader))

valid_loader = torch.utils.data.DataLoader(
  val_dataset,
  batch_size=10,
  shuffle=False,
  num_workers=0,
  collate_fn=collate_fn,
)

print(len(valid_loader))

num_classes = 2 # one class (class 0) is dedicated to the "background"

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_object_detection_model(num_classes):
  # load a model pre-trained pre-trained on COCO
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  return model

In [None]:
!pip install torchmetrics

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import VOCDetection
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from pprint import pprint
import os
import time

from torch.utils.tensorboard import SummaryWriter

"""Let the training begin!"""
checkpoint_folder = "/content/drive/MyDrive/model_checkpoint"
checkpoint_path = os.path.join(checkpoint_folder, "checkpoint.pt ")
best_model_path = os.path.join(checkpoint_folder, "best.pt ")


writer = SummaryWriter()
metric = MeanAveragePrecision()

def train(model, optimizer, train_loader, valid_loader, lr_scheduler):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    num_epochs = 20
    start_epoch = 0
    best_mAP = -1

    if 'checkpoint.pt' in os.listdir(checkpoint_folder):
        print('..............Checkpoint Loaded................')
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        best_mAP = checkpoint['best_mAP']

    for epoch in range(start_epoch, num_epochs):
        running_loss = 0.0
        train_total_of_samples = 0
        val_total_of_samples = 0
        mAP_sum = 0.0

        print(f"........................Starting Epoch: {epoch}...........................")
        model.train()
        print('.............Training................')

        start_time = time.time()
        for batch_idx, (images, targets) in enumerate(train_loader):
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            running_loss += losses.item()
            train_total_of_samples += len(targets)
            writer.add_scalar('Training Loss', losses.item(), epoch * len(train_loader) + batch_idx)
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            if batch_idx % 10 == 4:
              elapsed_time = time.time() - start_time
              avg_loss = running_loss / train_total_of_samples
              print(f'Batch[{batch_idx+1}/{len(train_loader)}], Avg. Loss: {avg_loss:.4f}, Time: {elapsed_time:.2f}s')

        lr_scheduler.step()

        print('...............Validation.................')
        model.eval()
        with torch.no_grad():
            for batch_idx, (images, targets) in enumerate(valid_loader):
                images = [image.to(device) for image in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                predict = model(images)
                metric.update(predict, targets)
                pprint(metric.compute())
                mAP = metric.compute()['map'].item()
                mAP_sum += mAP
                val_total_of_samples += len(targets)
                writer.add_scalar('Validation mAP', mAP, epoch * len(valid_loader) + batch_idx)

                if batch_idx % 10 == 4:
                  elapsed_time = time.time() - start_time
                  avg_mAP = mAP_sum / val_total_of_samples
                  print(f'Batch[{batch_idx+1}/{len(valid_loader)}], Avg. mAP: {avg_mAP:.4f}, Time: {elapsed_time:.2f}s')

                if mAP > best_mAP:
                    best_mAP = mAP
                    # Save the best model
                    torch.save(model.state_dict(), best_model_path)


        # Save a checkpoint of the model and optimizer
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_mAP': best_mAP
        }
        torch.save(checkpoint, checkpoint_path)
        print("..............Checkpoint Saved...............")
        # Print the loss and accuracy for the epoch
        print(f'Ending Epoch {epoch}/{num_epochs}, Epoch Loss: {running_loss/len(train_loader.dataset):.4f}, Epoch mAP: {mAP_sum/ len(valid_loader.dataset)}')

In [None]:
model = get_object_detection_model(num_classes)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
  optimizer,
  step_size=3,
  gamma=0.1
)


train(model, optimizer, train_loader, valid_loader, lr_scheduler)