In [7]:
# Import statements
import torch
import torchvision as tv
import transform as T

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor as faster_rcnn_predictor
from torchvision.models.detection.rpn import AnchorGenerator as anchor_generator
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor as mask_rcnn_predictor
from torchvision.models.detection import FasterRCNN as faster_rcnn


import torch.utils.tensorboard
import os
import numpy as np
from PIL import Image 

In [3]:
# Dataset Sub Class
class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms

        self.images = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PEDMasks"))))
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.root, "PNGImages", self.images[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])

        image = Image.open(image_path).convert("RGB")

        mask = Image.open(mask_path)
        mask = np.array(mask)

        object_ids = np.unique(mask)
        object_ids = object_ids[1:]

        masks = mask == object_ids[:, None, None]

        number_objs = len(object_ids)
        boxes = []
        for index in range(number_objs):
            position = np.where(masks[index])
            x_min = np.min(position[1])
            x_max = np.max(position[1])
            y_min = np.min(position[0])
            y_max = np.max(position[0])
            boxes.append([x_min, y_min, x_max, y_max])
        
        target = {
            "boxes": torch.as_tensor(boxes, dtype=torch.float32),
            "labels": torch.ones((number_objs, ), dtype=torch.int64),
            "masks": torch.as_tensor(masks, dtype=torch.uint8),
            "image_id": torch.tensor(idx),
            "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
            "iscrowd": torch.zeros((number_objs, ), dtype=torch.int64)
        }

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target
    
    def __len__(self):
        return len(self.images)

In [4]:
import torchvision
torchvision.__version__


'0.5.0'

In [5]:
pre_trained_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 2
in_features = pre_trained_model.roi_heads.box_predictor.cls_score.in_features
pre_trained_model.roi_heads.box_predictor = faster_rcnn_predictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\onots/.cache\torch\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100.0%


In [8]:
# Modifying the model to add a different backbone

# load a pre-trained model for classification and return only the features
back_bone = torchvision.models.mobilenet_v2(pretrained=True).features

# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
back_bone.out_channels = 1280

# RPN generates 5 * 3 anchors per spatial location, with 5 different sizes and 3 different aspect ratios.
# This results in a Tuple[Tuple[int]]
anchor_gen = anchor_generator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), ))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
pre_trained_model = faster_rcnn(back_bone, num_classes=2, rpn_anchor_generator=anchor_gen, box_roi_pool=roi_pooler)


In [9]:
# An Instance segmentation model for PennFundan Dataset

def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = faster_rcnn_predictor(in_features, num_classes)

    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layers = 256

    model.roi_heads.mask_predictor = mask_rcnn_predictor(in_features_mask, hidden_layers, num_classes)

    return model

In [15]:
from torchvision import transforms as T
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())

    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [25]:
from engine import train_one_epoch, evaluate
import utils

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
cwd = os.path.join(os.getcwd(), "datasets","PennFudanPed")
dataset = PennFudanDataset(cwd, get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=4,
 collate_fn=utils.collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions

ModuleNotFoundError: No module named 'engine'

In [20]:
cwd = os.path.join(os.getcwd(), "datasets","PennFudanPed")
print(cwd)

d:\onots\Sources\Personal\PyTorch\Object Detection\datasets\PennFudanPed
