In [1]:
import os
import torch
import torchvision
from torchvision import transforms
from engine import train_one_epoch, evaluate
import utils
from coco import CocoSubset
torch.manual_seed(1)
dsNm = 'SubCOCO_Dog'
root = 'E:/Resource/Dataset/COCO/{}'.format(dsNm)
annDir = os.path.join(root, 'annotations/instances_{}.json')
# coco = COCO(annDir.format('train2017'))

loading annotations into memory...
Done (t=0.32s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Amount of train images:
Dataset CocoSubset
    Number of datapoints: 4385
    Root location: E:/Resource/Dataset/COCO/SubCOCO_Dog\train2017
Amount of validation images:
Dataset CocoSubset
    Number of datapoints: 177
    Root location: E:/Resource/Dataset/COCO/SubCOCO_Dog\val2017
4385
<class 'coco.CocoSubset'>
tensor([[[0.6863, 0.6431, 0.6235,  ..., 0.9961, 0.9961, 0.9961],
         [0.6784, 0.6706, 0.6902,  ..., 0.9961, 0.9961, 0.9961],
         [0.6941, 0.6510, 0.6510,  ..., 0.9961, 0.9961, 0.9961],
         ...,
         [0.0863, 0.1333, 0.1059,  ..., 0.9137, 0.9725, 0.9686],
         [0.1608, 0.1529, 0.0863,  ..., 0.9490, 0.9490, 0.9451],
         [0.1294, 0.0980, 0.0980,  ..., 0.9922, 0.8902, 0.9490]],

        [[0.5451, 0.5020, 0.4745,  ..., 1.0000, 1.0000, 1.0000],
         [0.5373, 0.5294, 0.5412,  ..., 1.0000, 1.

## Creating Datasets

In [2]:
both_transform = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(0.5)
    ]),
    'val':transforms.Compose([])}


coco_train = CocoSubset(os.path.join(root, 'train2017'),
                        annDir.format('train2017'),
                        both_transform=both_transform['train'])

coco_val = CocoSubset(os.path.join(root, 'val2017'),
                      annDir.format('val2017'),
                      both_transform=both_transform['val'])

loading annotations into memory...
Done (t=0.27s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [3]:
print(len(coco_train))
print(type(coco_train))
print(coco_train[0])

4385
<class 'coco.CocoSubset'>
(tensor([[[0.6863, 0.6431, 0.6235,  ..., 0.9961, 0.9961, 0.9961],
         [0.6784, 0.6706, 0.6902,  ..., 0.9961, 0.9961, 0.9961],
         [0.6941, 0.6510, 0.6510,  ..., 0.9961, 0.9961, 0.9961],
         ...,
         [0.0863, 0.1333, 0.1059,  ..., 0.9137, 0.9725, 0.9686],
         [0.1608, 0.1529, 0.0863,  ..., 0.9490, 0.9490, 0.9451],
         [0.1294, 0.0980, 0.0980,  ..., 0.9922, 0.8902, 0.9490]],

        [[0.5451, 0.5020, 0.4745,  ..., 1.0000, 1.0000, 1.0000],
         [0.5373, 0.5294, 0.5412,  ..., 1.0000, 1.0000, 1.0000],
         [0.5608, 0.5098, 0.5098,  ..., 1.0000, 1.0000, 1.0000],
         ...,
         [0.1059, 0.1647, 0.1373,  ..., 0.5725, 0.6235, 0.6196],
         [0.1922, 0.1882, 0.1255,  ..., 0.6000, 0.6000, 0.5961],
         [0.1647, 0.1373, 0.1412,  ..., 0.6431, 0.5373, 0.5961]],

        [[0.4980, 0.4549, 0.4314,  ..., 0.9765, 0.9765, 0.9765],
         [0.4902, 0.4824, 0.4980,  ..., 0.9765, 0.9765, 0.9765],
         [0.5137, 0.4627, 

In [4]:
data_loader_train = torch.utils.data.DataLoader(coco_train,
                                         batch_size=2,
                                         shuffle=True,
                                         num_workers=8,
                                         collate_fn=utils.collate_fn)
data_loader_val = torch.utils.data.DataLoader(coco_val,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              collate_fn=utils.collate_fn)

In [5]:
data_loader_train

<torch.utils.data.dataloader.DataLoader at 0x236089c5ec8>

## Defining Model

In [6]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_instance_segmentation_model(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                      hidden_layer,
                                                      num_classes)
    return model

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')

num_classes = 5

model = get_instance_segmentation_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, 
                            lr=0.005,
                            momentum=0.9,
                            weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                              step_size=3,
                                              gamma=0.1)

In [8]:
num_epochs = 1

for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)
    lr_scheduler.step()
    evaluate(model, data_loader_val, device=device)

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


Epoch: [0]  [   0/2193]  eta: 19:28:41  lr: 0.000010  loss: 1.6075 (1.6075)  loss_classifier: 1.5875 (1.5875)  loss_box_reg: 0.0000 (0.0000)  loss_mask: 0.0000 (0.0000)  loss_objectness: 0.0152 (0.0152)  loss_rpn_box_reg: 0.0048 (0.0048)  time: 31.9751  data: 17.0712  max mem: 2359
Epoch: [0]  [  10/2193]  eta: 2:18:55  lr: 0.000060  loss: 1.6075 (1.5613)  loss_classifier: 1.5454 (1.4853)  loss_box_reg: 0.0000 (0.0000)  loss_mask: 0.0000 (0.0000)  loss_objectness: 0.0589 (0.0691)  loss_rpn_box_reg: 0.0048 (0.0069)  time: 3.8182  data: 1.5528  max mem: 2801


KeyboardInterrupt: 