In [1]:
import os

In [2]:
os.chdir('..')

In [3]:
pwd

'/Users/A.IVA/Documents/jupyter_notebooks/coursera_and_blogs/clouds'

### 1 Examples

##### 1.1 Example from MaskRCNN: Add a different backbone

In [None]:
# load a pre-trained model for classification and return only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features

# MaskRCNN needs to know the number of output channels in a backbone. For mobilenet_v2, it's 1280 so we need to add it here
backbone.out_channels = 1280

anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                     output_size=14,
                                                     sampling_ratio=2)

model = MaskRCNN(backbone,
                 num_classes=2,
                 rpn_anchor_generator=anchor_generator,
                 box_roi_pool=roi_pooler,
                 mask_roi_pool=mask_roi_pooler)
model.eval()

x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)

##### 1.2 Finetuning a pretrained model (what we have been doing until now)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has num_classes which is user-defined
# get number of input features for the classifier
# replace the pre-trained head with a new one
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


##### 1.3 Repeat 1.1 but with a model loaded from `pretrainedmodels`

Look at [this repo](https://github.com/Cadene/pretrained-models.pytorch#modelfeatures) for more information

In [28]:
import torch
import numpy as np
import pretrainedmodels

In [5]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [29]:
architecture = 'resnet18'
model = pretrainedmodels.__dict__[architecture](num_classes=1000, pretrained='imagenet')

In [30]:
backbone = model.features

tmp = torch.as_tensor(np.zeros(shape=(1,3,224*2,224*2)), dtype=torch.float32)
backbone(tmp).size()

torch.Size([1, 512, 14, 14])

In [39]:
type(backbone)

method

In [45]:
class MyBackbone(object):

    def __init__(self, backbone, out_channels):
        self.backbone = backbone
        self.out_channels = out_channels
        
    def __call__(self, input):
        return self.backbone(input)


my_backbone = MyBackbone(backbone, 512)
hasattr(my_backbone, "out_channels")

True

In [46]:
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.rpn import AnchorGenerator


anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                output_size=7,
                                                sampling_ratio=2)

mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                     output_size=14,
                                                     sampling_ratio=2)

model = MaskRCNN(my_backbone,
                 num_classes=2,
                 rpn_anchor_generator=anchor_generator,
                 box_roi_pool=roi_pooler,
                 mask_roi_pool=mask_roi_pooler)
model.eval()

x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)


In [53]:
len(predictions), type(predictions[0]), predictions[0].keys(), predictions[0]['boxes'].size()

(2,
 dict,
 dict_keys(['boxes', 'labels', 'scores', 'masks']),
 torch.Size([100, 4]))