In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
import pickle as cPickle
import argparse

import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision.transforms as T

import torch.nn as nn
import torch

!pip install albumentations==0.4.6
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import sys
sys.path.insert(0,"/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/dataset")



In [28]:
# Manage multiple versions of python with pip
# py -3.8 -m pip install package
#https://stackoverflow.com/questions/2812520/dealing-with-multiple-python-versions-and-pip
# Inspired by torchvision example: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

class BirdDataset(torch.utils.data.Dataset):
    """Class to charecterize the bird dataset"""

    def __init__(self, root_dir, transforms=None):
        """
        Args:
            root_dir (string): Directory with all the images
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root = root_dir
        self.transforms = transforms
        
        self.imgs = list(sorted(os.listdir(os.path.join(root_dir, "all_images")), key=lambda x: int(os.path.splitext(x)[0])))  # list of all image names - jpg
        self.boxes = list(sorted(os.listdir(os.path.join(root_dir, "all_labels")), key=lambda x: int(os.path.splitext(x)[0]))) # list of all image names - xml
    
    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        """Loads and returns a sample from the dataset at the given index idx"""
        # load images and boxes
        img_path = os.path.join(self.root, "all_images", self.imgs[idx])
        box_path = os.path.join(self.root, "all_labels", self.boxes[idx])
        # print("Image path", img_path)
        # print(type(cv2.imread(img_path, cv2.IMREAD_COLOR)))
        # img = Image.open(img_path).convert("RGB")
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img /= 255.0
        
        # get boxes for each bird
        document = ET.parse(box_path)
        root = document.getroot()
        boxes = []
        for item in root.findall(".//object/bndbox"):
            xmin = float(item.find('xmin').text)
            xmax = float(item.find('xmax').text)
            ymin = float(item.find('ymin').text)
            ymax = float(item.find('ymax').text)

            box = [xmin, ymin, xmax, ymax]
            boxes.append(box)
        num_objs = len(boxes)

        # convert everything into a torch.Tensor
        image_id = torch.tensor([idx+1])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64) # only one class : a bird
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        # target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        # target["iscrowd"] = iscrowd

        if self.transforms is not None:
            # img = self.transforms(img)
            sample = {
                'image': img,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)

            img = sample['image']
            if len(sample['bboxes']) == 0: # 
                target['boxes'] = torch.zeros((0, 4), dtype=torch.float32)
            else:
                target['boxes'] = torch.tensor(sample['bboxes'])

        return img, target

In [3]:
def get_model(trained=True, save_path=None):
  device = 'cpu'
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  num_classes = 2  # 1 class (bird) + background

  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  model.to(device)

  if trained:
    if save_path == None: print("No path tto the saved model")
    model.load_state_dict(torch.load(save_path))

  return model

In [53]:
#########
# UTILS #
#########
def collate_fn(batch):
  """Necessary for DataLoader"""
  return tuple(zip(*batch))

def get_transform(train):
  """Tansform the training and test set. Data Augmentation is made here."""
  transforms = []
  transforms.append(A.Resize(224, 224, interpolation = cv2.INTER_LANCZOS4))
  if train:
      print("")
      # transforms.append(A.RandomCrop(width=576, height=576))
      # transforms.append(A.Flip(0.5))
      # transforms.append(A.Normalize(mean=[0.598, 0.554, 0.508], std=[0.090, 0.081, 0.076]))           
  transforms.append(ToTensorV2(p=1.0)) 
  return A.Compose(transforms, bbox_params={'format': 'pascal_voc', 'min_visibility': 0.6, 'label_fields': ['labels']})

def save_performance(score, filename):
  """Save the scores (4) on a txt file of name filename"""
  return 0

class Performance():
    """Class to calculate and store the performance/score of a model"""
    def __init__(self, root_save, args):
        """
        Args:
            root_save (string): 
            params (callable, optional):
        """
        self.root_save = root_save
        self.args = args
        self.train_score = [[],[],[],[]]
        self.validation_score = [[],[],[],[]]
        self.training_iou = []
        self.test_iou = []

    def add_score(self, values, training):
      """
      Add the different score of the model for every iteration to the corresponding list of score
      values : dictionnary containing  'loss_classifier', 'loss_box_reg', 'loss_objectness', 'loss_rpn_box_reg'
      """
      if training:
        self.train_score[0].append(values['loss_classifier'])
        self.train_score[1].append(values['loss_box_reg'])
        self.train_score[2].append(values['loss_objectness'])
        self.train_score[3].append(values['loss_rpn_box_reg'])
      else:    
        self.train_score[0].append(values['loss_classifier'])
        self.train_score[1].append(values['loss_box_reg'])
        self.train_score[2].append(values['loss_objectness'])
        self.train_score[3].append(values['loss_rpn_box_reg'])
    
    def add_accuracy(self, val, training):
      if training:
        self.training_iou.append(val)
      else:
        self.test_iou.append(val)

    def save(self, obj):
        """Save the class in txt file"""
        with open(self.root_save, 'wb') as outp:  # Overwrites any existing file.
            cPickle.dump(obj, outp, cPickle.HIGHEST_PROTOCOL)

    # def load(self):
    #     """Load the class from txt file"""
    #     file = open(self.root_save+'.txt','r')
    #     dataPickle = file.read()
    #     file.close()

    #     self.__dict__ = cPickle.loads(dataPickle)

In [51]:
# Global variable
ROOT_DIR_DATA = "/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/dataset"
ROOT_DIR_SAVING = "/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/savedmodel"
SAVEDMODEL_NAME = "/fasterrcnn_SGD0005_None_Batch8_Epoch2.pth"
SAVEDPERFORMANCE_NAME = "/fasterrcnn_SGD0005_Batch8_Epoch2.pth"

# instantiate dataset objects
ds = BirdDataset(ROOT_DIR_DATA, get_transform(train=True))
ds_test = BirdDataset(ROOT_DIR_DATA, get_transform(train=False))

# set hyper-parameters
num_epochs = 1
num_classes = 2
num_coord = 4
num_workers = 2
batch_size = 8

# instantiate data loaders
# split the dataset in train and test set
random_seed = 1 # or any of your favorite number 
torch.manual_seed(random_seed)
indices = torch.randperm(len(ds)).tolist()
print(indices)
# torch.manual_seed(random_seed)
# indices = torch.randperm(len(ds)).tolist()
# print(indices)
# indices = torch.randperm(len(ds)).tolist()
# print(indices)
dataset = torch.utils.data.Subset(ds, indices[:-50][0:7])
dataset_test = torch.utils.data.Subset(ds_test, indices[-50:])

# define training and validation data loaders
data_loader_training = torch.utils.data.DataLoader(dataset, shuffle=True, collate_fn=collate_fn, 
                                                   num_workers=num_workers, batch_size=batch_size)
data_loader_test = torch.utils.data.DataLoader(dataset_test, shuffle=True, collate_fn=collate_fn,
                                               num_workers=num_workers, batch_size=batch_size)


[21, 130, 236, 536, 311, 18, 247, 371, 223, 513, 230, 95, 220, 159, 279, 498, 278, 419, 86, 307, 446, 554, 336, 597, 411, 253, 150, 618, 61, 33, 366, 152, 4, 557, 483, 266, 519, 355, 514, 464, 645, 475, 37, 605, 647, 416, 94, 302, 433, 377, 380, 51, 388, 212, 203, 463, 367, 238, 219, 305, 578, 275, 245, 308, 277, 167, 77, 168, 516, 649, 297, 109, 476, 177, 144, 83, 561, 395, 539, 369, 479, 113, 384, 293, 14, 496, 495, 494, 602, 176, 158, 169, 441, 590, 429, 434, 263, 5, 52, 78, 173, 469, 96, 328, 576, 505, 216, 55, 195, 357, 92, 206, 457, 188, 75, 407, 626, 531, 312, 89, 472, 567, 337, 372, 577, 226, 359, 8, 552, 389, 555, 122, 82, 244, 414, 537, 652, 224, 381, 126, 54, 228, 218, 526, 72, 7, 344, 386, 368, 123, 271, 365, 644, 334, 378, 620, 582, 426, 24, 199, 139, 296, 617, 530, 131, 56, 390, 298, 292, 601, 198, 157, 242, 232, 191, 125, 194, 329, 471, 129, 436, 600, 629, 453, 466, 452, 314, 63, 634, 137, 632, 499, 503, 40, 592, 585, 607, 541, 23, 508, 141, 375, 412, 331, 589, 290, 393

In [None]:
#################
# Visualisation #
#################
device = "cpu"
images, targets = next(iter(data_loader_training))
# for images, targets in data_loader_training:
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# print(targets)

for i in range(len(images)):
    # print(targets[i]['boxes'].size())
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    image_id = targets[i]['image_id'].cpu().numpy().astype(np.int32)
    area = targets[i]['area'].cpu().numpy().astype(np.int32)
    print(image_id)
    # print("Area", area)
    # print(images[i].size())
    sample = images[i].permute(1,2,0).cpu().numpy()
    # print(sample)
    # print(sample.shape)
    # print("Box", boxes)

    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
    sample = cv2.cvtColor(sample, cv2.COLOR_BGR2RGB)
    for box in boxes:
        cv2.rectangle(sample,
                    (int(box[0]), int(box[1])),
                    (int(box[2]), int(box[3])),
                    (1, 0, 0), 1)
        
    ax.imshow((sample * 255).astype(np.uint8))
    plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/"+str(image_id)+"area" + ".png")
    plt.show()

In [24]:
#################################
# Training and Testing function #
#################################
def training(args):
  ds = BirdDataset(ROOT_DIR_DATA, get_transform(train=True))
  data_loader_training = torch.utils.data.DataLoader(dataset, shuffle=True, collate_fn=collate_fn, 
                                                   num_workers=num_workers, batch_size=args.batch_size)

  device = "cpu"
  model = get_model(trained=False)

  params = [p for p in model.parameters() if p.requires_grad]
  optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
  # optimizer = torch.optim.Adam(params, lr=0.001)
  # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
  # lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
  lr_scheduler = None

  itr = 1
  epoch_loss = 0
  perf = Performance(ROOT_DIR_SAVING + SAVEDPERFORMANCE_NAME, args)

  for epoch in range(args.epochs):
      epoch_loss = 0
      iteration = 0
      model.train()

      # Training
      for images, targets in data_loader_training:
          images = list(image.to(device) for image in images)
          targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
          # print(images)

          loss_dict = model(images, targets) # returns losses and detections
          perf.add_score(loss_dict, training=True)
          print("Output model/Loss :", loss_dict)

          losses = sum(loss for loss in loss_dict.values())
          loss_value = losses.item()

          epoch_loss += loss_value

          optimizer.zero_grad()
          losses.backward()
          optimizer.step()

          if itr % 50 == 0:
              print(f"Iteration #{itr} loss: {loss_value}")
              torch.save(model.state_dict(), ROOT_DIR_SAVING + SAVEDMODEL_NAME)
              perf.save(perf)
              print("SavedOnce")

          itr += 1
          iteration += 1
      
      # update the learning rate
      if lr_scheduler is not None:
          lr_scheduler.step()

      print(f"Epoch #{epoch} loss: {epoch_loss/iteration}")
      MODEL_NAME = "/fasterrcnn_resnet50fpn_SGD0005_None_Batch8_Epoch2.pth"
      torch.save(model.state_dict(), ROOT_DIR_SAVING + MODEL_NAME)
      perf.save(perf)
      print("SaveAtEpoch")

  MODEL_NAME = "/fasterrcnn_resnet50fpn_SGD0005_None_Batch8_Epoch2.pth"
  torch.save(model.state_dict(), ROOT_DIR_SAVING + MODEL_NAME)
  perf.save(perf)
  print("Training is over.")
  print("The model is saved.")

def test(args):
  return 0

In [None]:
if __name__ == "__main__":
    # Settings
    parser = argparse.ArgumentParser(description='Point Cloud Part Segmentation')
    parser.add_argument('-f') #https://stackoverflow.com/questions/42249982/systemexit-2-error-when-calling-parse-args-within-ipython?noredirect=1&lq=1
    parser.add_argument('--eval', type=bool,  default=False, help='Evaluate the model')
    parser.add_argument('--model', type=str, default='fasterrcnn', metavar='N',
                        choices=['fasterrcnn'], help='Model to use')
    parser.add_argument('--batch_size', type=int, default=8, metavar='batch_size',
                        help='Size of batch)')
    parser.add_argument('--epochs', type=int, default=20, metavar='N',
                        help='Number of episode to train ')
    parser.add_argument('--use_sgd', type=bool, default=True,
                        help='Use SGD')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001, 0.1 if using sgd)')
    parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
                        help='SGD momentum (default: 0.9)')
    parser.add_argument('--scheduler', type=str, default='cos', metavar='N',
                        choices=['cos', 'step'],
                        help='Scheduler to use, [cos, step]')

    args = parser.parse_args()

    if args.eval:
      training(args)
    else:
      test(args)
    