In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pickle as cPickle

import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
import torchvision.transforms as T

import torch.nn as nn
import torch

!pip install albumentations==0.4.6
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import sys
sys.path.insert(0,"/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/dataset")

In [None]:
# Manage multiple versions of python with pip
# py -3.8 -m pip install package
#https://stackoverflow.com/questions/2812520/dealing-with-multiple-python-versions-and-pip
# Inspired by torchvision example: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

class BirdDataset(torch.utils.data.Dataset):
    """Class to charecterize the bird dataset"""

    def __init__(self, root_dir, transforms=None):
        """
        Args:
            root_dir (string): Directory with all the images
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root = root_dir
        self.transforms = transforms
        
        self.imgs = list(sorted(os.listdir(os.path.join(root_dir, "all_images"))))  # list of all image names - jpg
        self.boxes = list(sorted(os.listdir(os.path.join(root_dir, "all_labels")))) # list of all image names - xml
    
    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        """Loads and returns a sample from the dataset at the given index idx"""
        # load images and boxes
        img_path = os.path.join(self.root, "all_images", self.imgs[idx])
        box_path = os.path.join(self.root, "all_labels", self.boxes[idx])
        # print("Image path", img_path)
        # print(type(cv2.imread(img_path, cv2.IMREAD_COLOR)))
        # img = Image.open(img_path).convert("RGB")
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img /= 255.0
        
        # get boxes for each bird
        document = ET.parse(box_path)
        root = document.getroot()
        boxes = []
        for item in root.findall(".//object/bndbox"):
            xmin = float(item.find('xmin').text)
            xmax = float(item.find('xmax').text)
            ymin = float(item.find('ymin').text)
            ymax = float(item.find('ymax').text)

            box = [xmin, ymin, xmax, ymax]
            boxes.append(box)
        num_objs = len(boxes)

        # convert everything into a torch.Tensor
        image_id = torch.tensor([idx])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64) # only one class : a bird
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        # target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        # target["iscrowd"] = iscrowd

        if self.transforms is not None:
            # img = self.transforms(img)
            sample = {
                'image': img,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)

            img = sample['image']
            if len(sample['bboxes']) == 0: # 
                target['boxes'] = torch.zeros((0, 4), dtype=torch.float32)
            else:
                target['boxes'] = torch.tensor(sample['bboxes'])

        return img, target

In [None]:
def get_model(trained=True, save_path=None):
  device = 'cpu'
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  num_classes = 2  # 1 class (bird) + background

  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  model.to(device)

  if trained:
    if save_path == None: print("No path tto the saved model")
    model.load_state_dict(torch.load(save_path))

  return model

In [None]:
# UTILS #
#########
def collate_fn(batch):
  """Necessary for DataLoader"""
  return tuple(zip(*batch))

def get_transform(train):
  """Tansform the training and test set. Data Augmentation is made here."""
  transforms = []
  transforms.append(A.Resize(224, 224))
  if train:
      # transforms.append(A.RandomCrop(width=576, height=576))
      transforms.append(A.Flip(0.5))
      # transforms.append(A.Normalize(mean=[0.598, 0.554, 0.508], std=[0.090, 0.081, 0.076]))           
  transforms.append(ToTensorV2(p=1.0)) 
  return A.Compose(transforms, bbox_params={'format': 'pascal_voc', 'min_visibility': 0.6, 'label_fields': ['labels']})


In [None]:
# Global variable
ROOT_DIR_DATA = "/content/drive/MyDrive/Thesis/dataset"
ROOT_DIR_SAVING = "/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/savedmodel"
SAVEDMODEL_NAME = "/fasterrcnn_resnet50fpn_SGD0005_None_Batch8_Epoch2.pth"

# instantiate dataset objects
ds = BirdDataset(ROOT_DIR_DATA, get_transform(train=True))
ds_test = BirdDataset(ROOT_DIR_DATA, get_transform(train=False))

# set hyper-parameters
params = {'batch_size': 8, 'num_workers': 2}
num_epochs = 1
num_classes = 2
num_coord = 4

# instantiate data loaders
# split the dataset in train and test set
indices = torch.randperm(len(ds)).tolist()
dataset = torch.utils.data.Subset(ds, indices[:-50])
dataset_test = torch.utils.data.Subset(ds_test, indices[-50:])

# define training and validation data loaders
data_loader_training = torch.utils.data.DataLoader(dataset, shuffle=True, collate_fn=collate_fn, **params)
data_loader_test = torch.utils.data.DataLoader(dataset_test, shuffle=True, collate_fn=collate_fn, **params)