In [1]:
import torch

In [2]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

In [3]:
roi_pooler

MultiScaleRoIAlign(featmap_names=['0'], output_size=(7, 7), sampling_ratio=2)

In [4]:
anchor_generator

AnchorGenerator()

In [6]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [10]:
children_counter = 0
for n, c in model.named_children():
    print("Counter:", children_counter)
    print("Layer Name:", n)
    print("Layer Config:", c)
    children_counter += 1

Counter: 0
Layer Name: transform
Layer Config: GeneralizedRCNNTransform(
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    Resize(min_size=(800,), max_size=1333, mode='bilinear')
)
Counter: 1
Layer Name: backbone
Layer Config: Sequential(
  (0): ConvNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): Invert

In [11]:
model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              Sequential(
                (0): ConvNormActivation(
                  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
                  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                  (2): ReLU6(inplace=True)
                )
                (1): InvertedResidual(
                  (conv): Sequential(
                    (0): ConvNormActivation(
                      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
                      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                      (2): ReLU6(inplace=True)
         

In [12]:
model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [13]:
from copy import deepcopy

In [14]:
new_model = deepcopy(model)

In [16]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [17]:
new_model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [19]:
new_model._modules.pop('roi_heads')

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0'], output_size=(7, 7), sampling_ratio=2)
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=62720, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=2, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=8, bias=True)
  )
)

In [21]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn'])

In [22]:
new_model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [42]:
import torch
import matplotlib.pyplot as plt

BATCH_SIZE = 4 # increase / decrease according to GPU memeory
RESIZE_TO = 512 # resize the image for training and transforms
NUM_EPOCHS = 1 # number of epochs to train for

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# training images and XML files directory
TRAIN_DIR = 'data/out_rgb'
# validation images and XML files directory
VALID_DIR = 'data/out_rgb'

# classes: 0 index is reserved for background
CLASSES = [
    '0', '1', '2', '3', '4'
]
NUM_CLASSES = 5

# whether to visualize images after crearing the data loaders
VISUALIZE_TRANSFORMED_IMAGES = False

# location to save model and plots
# OUT_DIR = '../outputs'
OUT_DIR = 'outputs'
SAVE_PLOTS_EPOCH = 2 # save loss plots after these many epochs
SAVE_MODEL_EPOCH = 2 # save model after these many epochs

In [43]:
import albumentations as A
import cv2
import numpy as np
from albumentations.pytorch import ToTensorV2
from config import DEVICE, CLASSES as classes
# this class keeps track of the training and validation loss values...
# ... and helps to get the average for each epoch as well
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0
        
    def send(self, value):
        self.current_total += value
        self.iterations += 1
    
    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations
    
    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [44]:
def collate_fn(batch):
    """
    To handle the data loading as different images may have different number 
    of objects and to handle varying size tensors as well.
    """
    return tuple(zip(*batch))

In [45]:
# define the training tranforms
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.RandomRotate90(0.5),
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
        ToTensorV2(p=1.0),
    ], bbox_params={
        'format': 'pascal_voc',
        'label_fields': ['labels']
    })

def get_train_transform_without_boxes():
    return A.Compose([
        A.Flip(0.5),
        A.RandomRotate90(0.5),
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
        ToTensorV2(p=1.0)
    ])

# define the validation transforms
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0),
    ], bbox_params={
        'format': 'pascal_voc', 
        'label_fields': ['labels']
    })

In [46]:
def show_tranformed_image(train_loader):
    """
    This function shows the transformed images from the `train_loader`.
    Helps to check whether the tranformed images along with the corresponding
    labels are correct or not.
    Only runs if `VISUALIZE_TRANSFORMED_IMAGES = True` in config.py.
    """
    if len(train_loader) > 0:
        for i in range(1):
            images, targets = next(iter(train_loader))
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
            boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
            sample = images[i].permute(1, 2, 0).cpu().numpy()
            for box in boxes:
                cv2.rectangle(sample,
                            (box[0], box[1]),
                            (box[2], box[3]),
                            (0, 0, 255), 2)
            cv2.imshow('Transformed image', sample)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

In [62]:
import torch
import cv2
import numpy as np
import os
import glob as glob
from xml.etree import ElementTree as et
import json
# from config import CLASSES, RESIZE_TO, TRAIN_DIR, VALID_DIR, BATCH_SIZE

# from config import CLASSES, RESIZE_TO, BATCH_SIZE
# from torch.utils.data import Dataset, DataLoader
# from utils import collate_fn, get_train_transform, get_valid_transform

In [70]:
# the dataset class
class MicrocontrollerDataset(Dataset):
    def __init__(self, dir_path, width, height, classes, transforms=None, transforms_without_boxes=None):
        self.transforms_without_boxes = transforms_without_boxes
        self.transforms = transforms
        self.dir_path = dir_path
        self.height = height
        self.width = width
        self.classes = classes
        
        # get all the image paths in sorted order    
        self.image_paths = glob.glob(f"{self.dir_path}/*.png")
        self.all_images = [image_path.split('/')[-1].split('.')[0] for image_path in self.image_paths]
        self.all_images = sorted(self.all_images)

        # FileNames Dictionary
        self.filenames = dict()
        for sub_dir, dir_name, files in os.walk("data/out_bbox/"):
            for file in files:
                try:
                    with open("data/out_bbox/" + file, 'r') as fp:
                        file = file.split('.')[0]
                        self.filenames[file] = json.load(fp)
                except Exception as e:
                    pass
                
    def check_bounding_boxes(self, box_coordinates):
        box_coords = []
        for coordinate in box_coordinates:
            if coordinate < 1:
                box_coords.append(1)
            elif coordinate > self.width:
                box_coords.append(self.width)
            else:
                box_coords.append(coordinate)
        if box_coords[0] == box_coords[2]:
            if box_coords[2] != self.width:
                box_coords[2] += 5
            else:
                box_coords[0] -= 5
        if box_coords[1] == box_coords[3]:
            if box_coords[3] != self.width:
                box_coords[3] += 5
            else:
                box_coords[1] -= 5
        return box_coords
                    
    def __getitem__(self, idx):
        # capture the image name and the full image path
        image_name = self.all_images[idx]
        image_path = os.path.join(self.dir_path, image_name + '.png')
        # read the image
        # print(image_path)
        # image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
        image = cv2.imread(image_path)
        # plt.imshow(image)
        # convert BGR to RGB color format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image_resized = cv2.resize(image, (self.width, self.height))
        image_resized /= 255.0
        # plt.imshow(image_resized)
        
        # get the height and width of the image
        image_width = image.shape[1]
        image_height = image.shape[0]
        
        image_annotation_dict = self.filenames[image_name]
        
        boxes = []
        if image_annotation_dict["bboxes"]:
            for box_coordinates in self.filenames[image_name]["bboxes"]:
                if box_coordinates:
                    xmin = box_coordinates[0][0]
                    ymin = box_coordinates[0][1]
                    xmax = box_coordinates[1][0]
                    ymax = box_coordinates[1][1]
                    
                    xmin_final = (xmin/image_width)*self.width
                    xmax_final = (xmax/image_width)*self.width
                    ymin_final = (ymin/image_height)*self.height
                    ymax_final = (ymax/image_height)*self.height
                    box_coordinates_loc = [xmin_final, ymin_final, xmax_final, ymax_final]
                    boxes.append(self.check_bounding_boxes(box_coordinates_loc))
                    # boxes.append([xmin_final, ymin_final, xmax_final, ymax_final])
        labels_orig = image_annotation_dict["vehicle_class"]
        labels = []
        for label in labels_orig:
            labels.append(label + 1)

        num_objs = len(boxes)
        
        if not boxes:
            # print("Came Inside")
            boxes = torch.zeros((0,4), dtype=torch.float32)
            labels = torch.zeros(0, dtype=torch.int64)
            area = torch.zeros(0, dtype=torch.float32)
        else:
            # bounding box to tensor
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            # print(boxes)
            # labels to tensor
            labels = torch.as_tensor(labels, dtype=torch.int64)
            # area of the bounding boxes
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # no crowd instances
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        # prepare the final `target` dictionary
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        image_id = torch.tensor([idx])
        target["image_id"] = image_id
        # apply the image transforms
        # print(labels)
        # print(boxes)
        if self.transforms:
            # if num_objs == 0:
            #     print("Before Transformation:", target['boxes'])
            if num_objs != 0:
                try:
                    sample = self.transforms(image = image_resized,
                                             bboxes = boxes,
                                             labels = labels)
                    target_boxes = []
                    for box in sample['bboxes']:
                        target_box = []
                        for box_coordinate in box:
                            if box_coordinate < 1:
                                target_box.append(0)
                            elif box_coordinate > self.width:
                                target_box.append(self.width)
                            else:
                                target_box.append(box_coordinate)
                        if target_box[0] == target_box[2]:
                            if target_box[2] != self.width:
                                target_box[2] += 5
                            else:
                                target_box[0] -= 5
                        if target_box[3] == target_box[1]:
                            if target_box[3] != self.height:
                                target_box[3] += 5
                            else:
                                target_box[1] -= 5
                        target_boxes.append(target_box)
                    # target['boxes'] = torch.Tensor(sample['bboxes'])
                    target['boxes'] = torch.Tensor(target_boxes)
                except:
                    sample = dict()
                    sample['image'] = torch.tensor(image_resized).permute(2,0,1)
                    print("Boxes Exception:", boxes)
            else:
                sample = self.transforms_without_boxes(image = image_resized)
                target['boxes'] = torch.zeros((0,4), dtype=torch.float32)
                target['labels'] = torch.zeros(0, dtype=torch.int64)
                target['area'] = torch.zeros(0, dtype=torch.float32)
            image_resized = sample['image']
        return image_resized, target

    def __len__(self):
        return len(self.all_images)

In [71]:
# prepare the final datasets and data loaders
train_dataset = MicrocontrollerDataset(TRAIN_DIR, RESIZE_TO, RESIZE_TO, CLASSES, get_train_transform(), get_train_transform_without_boxes())
valid_dataset = MicrocontrollerDataset(VALID_DIR, RESIZE_TO, RESIZE_TO, CLASSES, get_valid_transform(), get_train_transform_without_boxes())
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_fn
)
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(valid_dataset)}\n")

Number of training samples: 2779
Number of validation samples: 2779



In [72]:
len(train_dataset)

2779

In [73]:
train_dataset

<__main__.MicrocontrollerDataset at 0x7fa18611a370>

In [74]:
len(train_dataset.all_images)

2779

In [75]:
train_dataset[0]

(tensor([[[0.8693, 0.8697, 0.8705,  ..., 0.8030, 0.9779, 0.4893],
          [0.8668, 0.8671, 0.8688,  ..., 0.8091, 0.9786, 0.4908],
          [0.8638, 0.8667, 0.8676,  ..., 0.8103, 0.9812, 0.5111],
          ...,
          [0.6392, 0.6461, 0.6388,  ..., 0.5786, 0.5842, 0.5605],
          [0.6336, 0.6531, 0.6471,  ..., 0.5478, 0.6133, 0.5950],
          [0.6366, 0.6124, 0.6383,  ..., 0.5165, 0.5908, 0.5604]],
 
         [[0.8157, 0.8187, 0.8159,  ..., 0.6819, 0.7966, 0.3024],
          [0.8157, 0.8157, 0.8174,  ..., 0.6847, 0.7944, 0.3017],
          [0.8140, 0.8154, 0.8157,  ..., 0.6864, 0.7976, 0.3196],
          ...,
          [0.5348, 0.5534, 0.5544,  ..., 0.4898, 0.4959, 0.4800],
          [0.5386, 0.5528, 0.5436,  ..., 0.4663, 0.5327, 0.5157],
          [0.5410, 0.5121, 0.5372,  ..., 0.4424, 0.5031, 0.4947]],
 
         [[0.7684, 0.7686, 0.7688,  ..., 0.6101, 0.6117, 0.2274],
          [0.7658, 0.7678, 0.7704,  ..., 0.6125, 0.6091, 0.2313],
          [0.7653, 0.7665, 0.7686,  ...,

In [76]:
train_dataset[0][0].shape

torch.Size([3, 512, 512])

In [78]:
unsqueezed_image_tensor = train_dataset[0][0].unsqueeze(dim=0)

In [79]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def create_model(num_classes):
    # load Faster RCNN pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # define a new head for the detector with required number of classes
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

In [80]:
NUM_CLASSES

5

In [81]:
model = create_model(num_classes=NUM_CLASSES)

In [82]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [83]:
new_model = deepcopy(model)

In [85]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [86]:
new_model._modules.pop('roi_heads')

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=12544, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=5, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=20, bias=True)
  )
)

In [95]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn'])

In [96]:
rpn_model = torch.nn.Sequential(new_model)

In [97]:
new_model.eval()
with torch.no_grad():
    image_output = rpn_model(unsqueezed_image_tensor)

AttributeError: 'FasterRCNN' object has no attribute 'roi_heads'

In [90]:
train_dataset[0][1]

{'boxes': tensor([[125.2959, 233.4059, 158.6772, 253.4091],
         [158.3572, 216.5084, 196.2177, 250.6612]]),
 'labels': tensor([2, 1]),
 'area': tensor([ 667.7323, 1293.0406]),
 'iscrowd': tensor([0, 0]),
 'image_id': tensor([0])}

In [121]:
model = create_model(num_classes=NUM_CLASSES)

In [122]:
class NewModel(torch.nn.Module):
    def __init__(self,output_layer = None):
        super().__init__()
        self.pretrained = model
        self.key_modules = self.pretrained._modules.keys()
        self.pretrained._modules.pop('roi_heads')
        self.net = torch.nn.Sequential(self.pretrained._modules)
        self.pretrained = None

    def forward(self,x):
        x = self.net(x)
        return x

In [123]:
new_model = NewModel()

In [124]:
new_model.eval()
with torch.no_grad():
    image_output = new_model(unsqueezed_image_tensor)

TypeError: conv2d() received an invalid combination of arguments - got (tuple, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)


In [125]:
list(new_model.children())

[Sequential(
   (transform): GeneralizedRCNNTransform(
       Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
       Resize(min_size=(800,), max_size=1333, mode='bilinear')
   )
   (backbone): BackboneWithFPN(
     (body): IntermediateLayerGetter(
       (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
       (bn1): FrozenBatchNorm2d(64, eps=0.0)
       (relu): ReLU(inplace=True)
       (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
       (layer1): Sequential(
         (0): Bottleneck(
           (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
           (bn1): FrozenBatchNorm2d(64, eps=0.0)
           (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
           (bn2): FrozenBatchNorm2d(64, eps=0.0)
           (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
           (bn3): FrozenBatchNorm2d(256, eps=0.0)
    

In [126]:
unsqueezed_image_tensor.shape

torch.Size([1, 3, 512, 512])

In [140]:
image_tensor_list = list(train_dataset[0][0].unsqueeze(dim=0))

In [148]:
model = create_model(num_classes=NUM_CLASSES)

In [156]:
new_model.eval()
model.eval()
with torch.no_grad():
    # image_output = new_model(image_tensor_list)
    image_output = model(image_tensor_list)

In [141]:
len(image_tensor_list)

1

In [142]:
image_tensor_list[0].shape

torch.Size([3, 512, 512])

In [143]:
train_dataset[0][0].shape

torch.Size([3, 512, 512])

In [153]:
for c, n in new_model.named_parameters():
    print("Layer Name:", c)

Layer Name: net.backbone.body.conv1.weight
Layer Name: net.backbone.body.layer1.0.conv1.weight
Layer Name: net.backbone.body.layer1.0.conv2.weight
Layer Name: net.backbone.body.layer1.0.conv3.weight
Layer Name: net.backbone.body.layer1.0.downsample.0.weight
Layer Name: net.backbone.body.layer1.1.conv1.weight
Layer Name: net.backbone.body.layer1.1.conv2.weight
Layer Name: net.backbone.body.layer1.1.conv3.weight
Layer Name: net.backbone.body.layer1.2.conv1.weight
Layer Name: net.backbone.body.layer1.2.conv2.weight
Layer Name: net.backbone.body.layer1.2.conv3.weight
Layer Name: net.backbone.body.layer2.0.conv1.weight
Layer Name: net.backbone.body.layer2.0.conv2.weight
Layer Name: net.backbone.body.layer2.0.conv3.weight
Layer Name: net.backbone.body.layer2.0.downsample.0.weight
Layer Name: net.backbone.body.layer2.1.conv1.weight
Layer Name: net.backbone.body.layer2.1.conv2.weight
Layer Name: net.backbone.body.layer2.1.conv3.weight
Layer Name: net.backbone.body.layer2.2.conv1.weight
Layer N

In [161]:
for index, (c, n) in enumerate(model.named_parameters()):
    print("Index:", index, "Layer Name:", c)

Index: 0 Layer Name: backbone.body.conv1.weight
Index: 1 Layer Name: backbone.body.layer1.0.conv1.weight
Index: 2 Layer Name: backbone.body.layer1.0.conv2.weight
Index: 3 Layer Name: backbone.body.layer1.0.conv3.weight
Index: 4 Layer Name: backbone.body.layer1.0.downsample.0.weight
Index: 5 Layer Name: backbone.body.layer1.1.conv1.weight
Index: 6 Layer Name: backbone.body.layer1.1.conv2.weight
Index: 7 Layer Name: backbone.body.layer1.1.conv3.weight
Index: 8 Layer Name: backbone.body.layer1.2.conv1.weight
Index: 9 Layer Name: backbone.body.layer1.2.conv2.weight
Index: 10 Layer Name: backbone.body.layer1.2.conv3.weight
Index: 11 Layer Name: backbone.body.layer2.0.conv1.weight
Index: 12 Layer Name: backbone.body.layer2.0.conv2.weight
Index: 13 Layer Name: backbone.body.layer2.0.conv3.weight
Index: 14 Layer Name: backbone.body.layer2.0.downsample.0.weight
Index: 15 Layer Name: backbone.body.layer2.1.conv1.weight
Index: 16 Layer Name: backbone.body.layer2.1.conv2.weight
Index: 17 Layer Nam

In [177]:
from torch import nn
from collections import OrderedDict
model = create_model(num_classes=NUM_CLASSES)
model.layer_name
class NewModel(nn.Module):
    def __init__(self, output_layers, *args):
        super().__init__(*args)
        self.output_layers = output_layers
        #print(self.output_layers)
        self.selected_out = OrderedDict()
        #PRETRAINED MODEL
        self.pretrained = model
        self.fhooks = []

        for i,l in enumerate(list(self.pretrained._modules.keys())):
            if i in self.output_layers:
                self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
    
    def forward_hook(self,layer_name):
        def hook(module, input, output):
            self.selected_out[layer_name] = output
        return hook

    def forward(self, x):
        out = self.pretrained(x)
        return out, self.selected_out

AttributeError: 'FasterRCNN' object has no attribute 'layer_name'

In [173]:
new_model = NewModel(output_layers = [77,78]).to('cuda:0')

In [175]:
new_model.eval()
model.eval()
with torch.no_grad():
    # image_output = new_model(image_tensor_list)
    image_output = new_model(image_tensor_list)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [176]:
new_model

NewModel(
  (pretrained): FasterRCNN(
    (transform): GeneralizedRCNNTransform(
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        Resize(min_size=(800,), max_size=1333, mode='bilinear')
    )
    (backbone): BackboneWithFPN(
      (body): IntermediateLayerGetter(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=0.0)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): FrozenBatchNorm2d(64, eps=0.0)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): FrozenBatchNorm2d(64, eps=0.0)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            

In [178]:
from torchvision.models.resnet import model_urls

In [199]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN:
    def __init__(self,output_layer,*args):
        self.output_layer = output_layer
        super().__init__(*args)
        self.pretrained = model
        self._layers = []
        for l in list(self.pretrained._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self.pretrained,l) for l in self._layers]))

    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)
        return x

    def forward(self, x):
        return self._forward_impl(x)

ValueError: backbone should contain an attribute out_channels specifying the number of output channels (assumed to be the same for all the levels)

In [219]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        self.output_layer = output_layer
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self = model
        self.output_layer = output_layer
        self._layers = []
        for l in list(self._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self,l) for l in self._layers]))
        print(self.layers, self._layers)
    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)

        return x

    def forward(self, x):
        return self._forward_impl(x)

In [223]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self.model = model
        self.model.output_layer = output_layer
        self.model._layers = []
        for l in list(self.model._modules.keys()):
            self.model._layers.append(l)
            if l == output_layer:
                break
        self.model.layers = OrderedDict(zip(self.model._layers,[getattr(self.model,l) for l in self.model._layers]))
        print(self.model.layers, self.model._layers)
    def _forward_impl(self, x):
        for l in self.model._layers:
            x = self.model.layers[l](x)

        return x

    def forward(self, x):
        return self._forward_impl(x)

In [248]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self.model = model
        self.model.output_layer = output_layer
        self._layers = []
        for l in list(self.model._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self.model,l) for l in self._layers]))
        self.model._modules.pop('roi_heads')
        self._modules = self.model._modules
        print(self.layers, self._layers)
        
    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)
        return x

    def forward(self, x):
        return self._forward_impl(x)

In [249]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [250]:
model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              BackboneWithFPN(
                (body): IntermediateLayerGetter(
                  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
                  (bn1): FrozenBatchNorm2d(64, eps=0.0)
                  (relu): ReLU(inplace=True)
                  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
                  (layer1): Sequential(
                    (0): Bottleneck(
                      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                      (bn1): FrozenBatchNorm2d(64, eps=0.0)
                      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    

In [251]:
new_model = ModifiedFasterRCNN('rpn', NUM_CLASSES)

OrderedDict([('transform', GeneralizedRCNNTransform(
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    Resize(min_size=(800,), max_size=1333, mode='bilinear')
)), ('backbone', BackboneWithFPN(
  (body): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): FrozenBatchNorm2d(64, eps=0.0)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=0.0)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): FrozenBatchNorm2d(64, eps=0.0)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): FrozenBatchNorm2d(256, eps=0.0)
        (relu): ReLU(inplace=True)
        (downsample

In [252]:
new_model._layers

['transform', 'backbone', 'rpn']

In [253]:
new_model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              BackboneWithFPN(
                (body): IntermediateLayerGetter(
                  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
                  (bn1): FrozenBatchNorm2d(64, eps=0.0)
                  (relu): ReLU(inplace=True)
                  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
                  (layer1): Sequential(
                    (0): Bottleneck(
                      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                      (bn1): FrozenBatchNorm2d(64, eps=0.0)
                      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    

In [254]:
# image_tensor_list = list(train_dataset[0][0].unsqueeze(dim=0).float())
image_tensors = train_dataset[0][0].unsqueeze(dim=0).float()

In [255]:
# new_model(image_tensor_list)
new_model(image_tensors)

TypeError: conv2d() received an invalid combination of arguments - got (tuple, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)


In [239]:
image_tensor_list

[tensor([[[0.8693, 0.8668, 0.8638,  ..., 0.6392, 0.6336, 0.6366],
          [0.8697, 0.8671, 0.8667,  ..., 0.6461, 0.6531, 0.6124],
          [0.8705, 0.8688, 0.8676,  ..., 0.6388, 0.6471, 0.6383],
          ...,
          [0.8030, 0.8091, 0.8103,  ..., 0.5786, 0.5478, 0.5165],
          [0.9779, 0.9786, 0.9812,  ..., 0.5842, 0.6133, 0.5908],
          [0.4893, 0.4908, 0.5111,  ..., 0.5605, 0.5950, 0.5604]],
 
         [[0.8157, 0.8157, 0.8140,  ..., 0.5348, 0.5386, 0.5410],
          [0.8187, 0.8157, 0.8154,  ..., 0.5534, 0.5528, 0.5121],
          [0.8159, 0.8174, 0.8157,  ..., 0.5544, 0.5436, 0.5372],
          ...,
          [0.6819, 0.6847, 0.6864,  ..., 0.4898, 0.4663, 0.4424],
          [0.7966, 0.7944, 0.7976,  ..., 0.4959, 0.5327, 0.5031],
          [0.3024, 0.3017, 0.3196,  ..., 0.4800, 0.5157, 0.4947]],
 
         [[0.7684, 0.7658, 0.7653,  ..., 0.4409, 0.4505, 0.4447],
          [0.7686, 0.7678, 0.7665,  ..., 0.4554, 0.4491, 0.4170],
          [0.7688, 0.7704, 0.7686,  ...,

In [256]:
model = create_model(num_classes=NUM_CLASSES)

In [None]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

model.fc0.conv2.register_forward_hook(get_activation('fc0.conv2'))
model.fc1.conv2.register_forward_hook(get_activation('fc1.conv2'))

output = model(x)
print(activation['fc0.conv2'])
print(activation['fc0.conv1'])

In [257]:
model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              BackboneWithFPN(
                (body): IntermediateLayerGetter(
                  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
                  (bn1): FrozenBatchNorm2d(64, eps=0.0)
                  (relu): ReLU(inplace=True)
                  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
                  (layer1): Sequential(
                    (0): Bottleneck(
                      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                      (bn1): FrozenBatchNorm2d(64, eps=0.0)
                      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    

In [260]:
model.eval()
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# model.fc0.conv2.register_forward_hook(get_activation('fc0.conv2'))
# model.fc1.conv2.register_forward_hook(get_activation('fc1.conv2'))

model.rpn.head.cls_logits.register_forward_hook(get_activation('rpn.head.cls_logits'))
model.rpn.head.bbox_pred.register_forward_hook(get_activation('rpn.head.bbox_pred'))
model.roi_heads.box_roi_pool.register_forward_hook(get_activation('roi_heads.box_roi_pool'))

output = model(image_tensor_list)
print(activation['rpn.head.cls_logits'].shape)
print(activation['rpn.head.bbox_pred'].shape)
print(activation['roi_heads.box_roi_pool'].shape)

torch.Size([1, 3, 13, 13])
torch.Size([1, 12, 13, 13])
torch.Size([1000, 256, 7, 7])
