In [87]:
import torch
import importlib

In [88]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

In [89]:
importlib.reload(torchvision)

<module 'torchvision' from '/home/karthikragunath/anaconda3/envs/semantic_scene_understanding/lib/python3.8/site-packages/torchvision/__init__.py'>

In [90]:
roi_pooler

MultiScaleRoIAlign(featmap_names=['0'], output_size=(7, 7), sampling_ratio=2)

In [91]:
anchor_generator

AnchorGenerator()

In [92]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [93]:
children_counter = 0
for n, c in model.named_children():
    print("Counter:", children_counter)
    print("Layer Name:", n)
    print("Layer Config:", c)
    children_counter += 1

Counter: 0
Layer Name: transform
Layer Config: GeneralizedRCNNTransform(
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    Resize(min_size=(800,), max_size=1333, mode='bilinear')
)
Counter: 1
Layer Name: backbone
Layer Config: Sequential(
  (0): ConvNormActivation(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(inplace=True)
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): Invert

In [94]:
model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              Sequential(
                (0): ConvNormActivation(
                  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
                  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                  (2): ReLU6(inplace=True)
                )
                (1): InvertedResidual(
                  (conv): Sequential(
                    (0): ConvNormActivation(
                      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
                      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                      (2): ReLU6(inplace=True)
         

In [95]:
model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [96]:
from copy import deepcopy

In [97]:
new_model = deepcopy(model)

In [98]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [99]:
new_model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [100]:
new_model._modules.pop('roi_heads')

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0'], output_size=(7, 7), sampling_ratio=2)
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=62720, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=2, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=8, bias=True)
  )
)

In [101]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn'])

In [102]:
new_model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(


In [103]:
import torch
import matplotlib.pyplot as plt

BATCH_SIZE = 4 # increase / decrease according to GPU memeory
RESIZE_TO = 512 # resize the image for training and transforms
NUM_EPOCHS = 1 # number of epochs to train for

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# training images and XML files directory
TRAIN_DIR = 'data/out_rgb'
# validation images and XML files directory
VALID_DIR = 'data/out_rgb'

# classes: 0 index is reserved for background
CLASSES = [
    '0', '1', '2', '3', '4'
]
NUM_CLASSES = 5

# whether to visualize images after crearing the data loaders
VISUALIZE_TRANSFORMED_IMAGES = False

# location to save model and plots
# OUT_DIR = '../outputs'
OUT_DIR = 'outputs'
SAVE_PLOTS_EPOCH = 2 # save loss plots after these many epochs
SAVE_MODEL_EPOCH = 2 # save model after these many epochs

In [104]:
import albumentations as A
import cv2
import numpy as np
from albumentations.pytorch import ToTensorV2
from config import DEVICE, CLASSES as classes
# this class keeps track of the training and validation loss values...
# ... and helps to get the average for each epoch as well
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0
        
    def send(self, value):
        self.current_total += value
        self.iterations += 1
    
    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations
    
    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [105]:
def collate_fn(batch):
    """
    To handle the data loading as different images may have different number 
    of objects and to handle varying size tensors as well.
    """
    return tuple(zip(*batch))

In [106]:
# define the training tranforms
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.RandomRotate90(0.5),
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
        ToTensorV2(p=1.0),
    ], bbox_params={
        'format': 'pascal_voc',
        'label_fields': ['labels']
    })

def get_train_transform_without_boxes():
    return A.Compose([
        A.Flip(0.5),
        A.RandomRotate90(0.5),
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
        ToTensorV2(p=1.0)
    ])

# define the validation transforms
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0),
    ], bbox_params={
        'format': 'pascal_voc', 
        'label_fields': ['labels']
    })

In [107]:
def show_tranformed_image(train_loader):
    """
    This function shows the transformed images from the `train_loader`.
    Helps to check whether the tranformed images along with the corresponding
    labels are correct or not.
    Only runs if `VISUALIZE_TRANSFORMED_IMAGES = True` in config.py.
    """
    if len(train_loader) > 0:
        for i in range(1):
            images, targets = next(iter(train_loader))
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
            boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
            sample = images[i].permute(1, 2, 0).cpu().numpy()
            for box in boxes:
                cv2.rectangle(sample,
                            (box[0], box[1]),
                            (box[2], box[3]),
                            (0, 0, 255), 2)
            cv2.imshow('Transformed image', sample)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

In [108]:
import torch
import cv2
import numpy as np
import os
import glob as glob
from xml.etree import ElementTree as et
import json
# from config import CLASSES, RESIZE_TO, TRAIN_DIR, VALID_DIR, BATCH_SIZE

# from config import CLASSES, RESIZE_TO, BATCH_SIZE
from torch.utils.data import Dataset, DataLoader
# from utils import collate_fn, get_train_transform, get_valid_transform

In [109]:
# the dataset class
class MicrocontrollerDataset(Dataset):
    def __init__(self, dir_path, width, height, classes, transforms=None, transforms_without_boxes=None):
        self.transforms_without_boxes = transforms_without_boxes
        self.transforms = transforms
        self.dir_path = dir_path
        self.height = height
        self.width = width
        self.classes = classes
        
        # get all the image paths in sorted order    
        self.image_paths = glob.glob(f"{self.dir_path}/*.png")
        self.all_images = [image_path.split('/')[-1].split('.')[0] for image_path in self.image_paths]
        self.all_images = sorted(self.all_images)

        # FileNames Dictionary
        self.filenames = dict()
        for sub_dir, dir_name, files in os.walk("data/out_bbox/"):
            for file in files:
                try:
                    with open("data/out_bbox/" + file, 'r') as fp:
                        file = file.split('.')[0]
                        self.filenames[file] = json.load(fp)
                except Exception as e:
                    pass
                
    def check_bounding_boxes(self, box_coordinates):
        box_coords = []
        for coordinate in box_coordinates:
            if coordinate < 1:
                box_coords.append(1)
            elif coordinate > self.width:
                box_coords.append(self.width)
            else:
                box_coords.append(coordinate)
        if box_coords[0] == box_coords[2]:
            if box_coords[2] != self.width:
                box_coords[2] += 5
            else:
                box_coords[0] -= 5
        if box_coords[1] == box_coords[3]:
            if box_coords[3] != self.width:
                box_coords[3] += 5
            else:
                box_coords[1] -= 5
        return box_coords
                    
    def __getitem__(self, idx):
        # capture the image name and the full image path
        image_name = self.all_images[idx]
        image_path = os.path.join(self.dir_path, image_name + '.png')
        # read the image
        # print(image_path)
        # image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
        image = cv2.imread(image_path)
        # plt.imshow(image)
        # convert BGR to RGB color format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image_resized = cv2.resize(image, (self.width, self.height))
        image_resized /= 255.0
        # plt.imshow(image_resized)
        
        # get the height and width of the image
        image_width = image.shape[1]
        image_height = image.shape[0]
        
        image_annotation_dict = self.filenames[image_name]
        
        boxes = []
        if image_annotation_dict["bboxes"]:
            for box_coordinates in self.filenames[image_name]["bboxes"]:
                if box_coordinates:
                    xmin = box_coordinates[0][0]
                    ymin = box_coordinates[0][1]
                    xmax = box_coordinates[1][0]
                    ymax = box_coordinates[1][1]
                    
                    xmin_final = (xmin/image_width)*self.width
                    xmax_final = (xmax/image_width)*self.width
                    ymin_final = (ymin/image_height)*self.height
                    ymax_final = (ymax/image_height)*self.height
                    box_coordinates_loc = [xmin_final, ymin_final, xmax_final, ymax_final]
                    boxes.append(self.check_bounding_boxes(box_coordinates_loc))
                    # boxes.append([xmin_final, ymin_final, xmax_final, ymax_final])
        labels_orig = image_annotation_dict["vehicle_class"]
        labels = []
        for label in labels_orig:
            labels.append(label + 1)

        num_objs = len(boxes)
        
        if not boxes:
            # print("Came Inside")
            boxes = torch.zeros((0,4), dtype=torch.float32)
            labels = torch.zeros(0, dtype=torch.int64)
            area = torch.zeros(0, dtype=torch.float32)
        else:
            # bounding box to tensor
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            # print(boxes)
            # labels to tensor
            labels = torch.as_tensor(labels, dtype=torch.int64)
            # area of the bounding boxes
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # no crowd instances
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        # prepare the final `target` dictionary
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        image_id = torch.tensor([idx])
        target["image_id"] = image_id
        # apply the image transforms
        # print(labels)
        # print(boxes)
        if self.transforms:
            # if num_objs == 0:
            #     print("Before Transformation:", target['boxes'])
            if num_objs != 0:
                try:
                    sample = self.transforms(image = image_resized,
                                             bboxes = boxes,
                                             labels = labels)
                    target_boxes = []
                    for box in sample['bboxes']:
                        target_box = []
                        for box_coordinate in box:
                            if box_coordinate < 1:
                                target_box.append(0)
                            elif box_coordinate > self.width:
                                target_box.append(self.width)
                            else:
                                target_box.append(box_coordinate)
                        if target_box[0] == target_box[2]:
                            if target_box[2] != self.width:
                                target_box[2] += 5
                            else:
                                target_box[0] -= 5
                        if target_box[3] == target_box[1]:
                            if target_box[3] != self.height:
                                target_box[3] += 5
                            else:
                                target_box[1] -= 5
                        target_boxes.append(target_box)
                    # target['boxes'] = torch.Tensor(sample['bboxes'])
                    target['boxes'] = torch.Tensor(target_boxes)
                except:
                    sample = dict()
                    sample['image'] = torch.tensor(image_resized).permute(2,0,1)
                    print("Boxes Exception:", boxes)
            else:
                sample = self.transforms_without_boxes(image = image_resized)
                target['boxes'] = torch.zeros((0,4), dtype=torch.float32)
                target['labels'] = torch.zeros(0, dtype=torch.int64)
                target['area'] = torch.zeros(0, dtype=torch.float32)
            image_resized = sample['image']
        return image_resized, target

    def __len__(self):
        return len(self.all_images)

In [110]:
# prepare the final datasets and data loaders
train_dataset = MicrocontrollerDataset(TRAIN_DIR, RESIZE_TO, RESIZE_TO, CLASSES, get_train_transform(), get_train_transform_without_boxes())
valid_dataset = MicrocontrollerDataset(VALID_DIR, RESIZE_TO, RESIZE_TO, CLASSES, get_valid_transform(), get_train_transform_without_boxes())
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn
)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_fn
)
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(valid_dataset)}\n")

Number of training samples: 2779
Number of validation samples: 2779



In [111]:
len(train_dataset)

2779

In [112]:
train_dataset

<__main__.MicrocontrollerDataset at 0x7f6172494a60>

In [113]:
len(train_dataset.all_images)

2779

In [114]:
# train_dataset[0]

In [115]:
# train_dataset[0][0].shape

In [116]:
# unsqueezed_image_tensor = train_dataset[0][0].unsqueeze(dim=0)

In [117]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def create_model(num_classes):
    # load Faster RCNN pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get the number of input features 
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # define a new head for the detector with required number of classes
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
    return model

In [118]:
NUM_CLASSES

5

In [119]:
model = create_model(num_classes=NUM_CLASSES)

In [120]:
# model

In [121]:
new_model = deepcopy(model)

In [122]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn', 'roi_heads'])

In [123]:
new_model._modules.pop('roi_heads')

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=12544, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=5, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=20, bias=True)
  )
)

In [124]:
new_model._modules.keys()

odict_keys(['transform', 'backbone', 'rpn'])

In [125]:
rpn_model = torch.nn.Sequential(new_model)

In [126]:
new_model.eval()
with torch.no_grad():
    image_output = rpn_model(unsqueezed_image_tensor)

NameError: name 'unsqueezed_image_tensor' is not defined

In [127]:
train_dataset[0][1]

{'boxes': tensor([[125.2959, 233.4059, 158.6772, 253.4091],
         [158.3572, 216.5084, 196.2177, 250.6612]]),
 'labels': tensor([2, 1]),
 'area': tensor([ 667.7323, 1293.0406]),
 'iscrowd': tensor([0, 0]),
 'image_id': tensor([0])}

In [128]:
model = create_model(num_classes=NUM_CLASSES)

In [129]:
class NewModel(torch.nn.Module):
    def __init__(self,output_layer = None):
        super().__init__()
        self.pretrained = model
        self.key_modules = self.pretrained._modules.keys()
        self.pretrained._modules.pop('roi_heads')
        self.net = torch.nn.Sequential(self.pretrained._modules)
        self.pretrained = None

    def forward(self,x):
        x = self.net(x)
        return x

In [130]:
new_model = NewModel()

In [131]:
new_model.eval()
with torch.no_grad():
    image_output = new_model(unsqueezed_image_tensor)

NameError: name 'unsqueezed_image_tensor' is not defined

In [132]:
# list(new_model.children())

In [133]:
unsqueezed_image_tensor.shape

NameError: name 'unsqueezed_image_tensor' is not defined

In [134]:
image_tensor_list = list(train_dataset[0][0].unsqueeze(dim=0))

In [135]:
model = create_model(num_classes=NUM_CLASSES)

In [136]:
new_model.eval()
model.eval()
with torch.no_grad():
    # image_output = new_model(image_tensor_list)
    image_output = model(image_tensor_list)

************************************************** Proposals: tensor([[[ -23.8775,   -6.3713,    2.0351,    5.8798],
         [ -15.9855,  -14.8977,    4.9691,    3.8622],
         [ -10.5821,  -18.7462,    4.1850,    5.5739],
         ...,
         [ 420.3168,  594.6057, 1286.6627,  842.9067],
         [ 546.4363,  484.3118, 1007.7500,  968.0508],
         [ 628.6169,  415.1051,  871.7748, 1128.2837]]]) **************************************************
Detections: [{'boxes': tensor([[384.1220, 262.7993, 438.7559, 394.5708],
        [310.3770, 140.4965, 502.7046, 253.2227],
        [294.2506, 174.1677, 304.2976, 187.5080],
        [160.3324, 191.3154, 393.0289, 243.8857],
        [182.0654, 146.5913, 492.6638, 289.1804],
        [342.2827, 260.0227, 461.3143, 404.3558],
        [363.0435, 167.9625, 477.9081, 403.9319],
        [275.8237, 168.7108, 376.0471, 254.8865],
        [199.5250, 208.6568, 432.4667, 256.5092],
        [255.8939, 221.9605, 442.8710, 270.3924],
        [296.6137,

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [137]:
len(image_tensor_list)

1

In [138]:
image_tensor_list[0].shape

torch.Size([3, 512, 512])

In [139]:
train_dataset[0][0].shape

torch.Size([3, 512, 512])

In [140]:
# for c, n in new_model.named_parameters():
#     print("Layer Name:", c)

In [141]:
# for index, (c, n) in enumerate(model.named_parameters()):
#     print("Index:", index, "Layer Name:", c)

In [142]:
from torch import nn
from collections import OrderedDict
model = create_model(num_classes=NUM_CLASSES)
model.layer_name
class NewModel(nn.Module):
    def __init__(self, output_layers, *args):
        super().__init__(*args)
        self.output_layers = output_layers
        #print(self.output_layers)
        self.selected_out = OrderedDict()
        #PRETRAINED MODEL
        self.pretrained = model
        self.fhooks = []

        for i,l in enumerate(list(self.pretrained._modules.keys())):
            if i in self.output_layers:
                self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
    
    def forward_hook(self,layer_name):
        def hook(module, input, output):
            self.selected_out[layer_name] = output
        return hook

    def forward(self, x):
        out = self.pretrained(x)
        return out, self.selected_out

AttributeError: 'FasterRCNN' object has no attribute 'layer_name'

In [None]:
new_model = NewModel(output_layers = [77,78]).to('cuda:0')

In [143]:
new_model.eval()
model.eval()
with torch.no_grad():
    # image_output = new_model(image_tensor_list)
    image_output = new_model(image_tensor_list)

TypeError: conv2d() received an invalid combination of arguments - got (tuple, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)


In [144]:
# new_model

In [145]:
from torchvision.models.resnet import model_urls

In [146]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN:
    def __init__(self,output_layer,*args):
        self.output_layer = output_layer
        super().__init__(*args)
        self.pretrained = model
        self._layers = []
        for l in list(self.pretrained._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self.pretrained,l) for l in self._layers]))

    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)
        return x

    def forward(self, x):
        return self._forward_impl(x)

In [147]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        self.output_layer = output_layer
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self = model
        self.output_layer = output_layer
        self._layers = []
        for l in list(self._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self,l) for l in self._layers]))
        print(self.layers, self._layers)
    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)

        return x

    def forward(self, x):
        return self._forward_impl(x)

In [148]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self.model = model
        self.model.output_layer = output_layer
        self.model._layers = []
        for l in list(self.model._modules.keys()):
            self.model._layers.append(l)
            if l == output_layer:
                break
        self.model.layers = OrderedDict(zip(self.model._layers,[getattr(self.model,l) for l in self.model._layers]))
        print(self.model.layers, self.model._layers)
    def _forward_impl(self, x):
        for l in self.model._layers:
            x = self.model.layers[l](x)

        return x

    def forward(self, x):
        return self._forward_impl(x)

In [149]:
model = create_model(num_classes=NUM_CLASSES)
class ModifiedFasterRCNN(FastRCNNPredictor):
    def __init__(self,output_layer,*args):
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        # get the number of input features 
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # define a new head for the detector with required number of classes
        # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 
        model.roi_heads.box_predictor = super().__init__(in_features, *args)
        self.model = model
        self.model.output_layer = output_layer
        self._layers = []
        for l in list(self.model._modules.keys()):
            self._layers.append(l)
            if l == output_layer:
                break
        self.layers = OrderedDict(zip(self._layers,[getattr(self.model,l) for l in self._layers]))
        self.model._modules.pop('roi_heads')
        self._modules = self.model._modules
        print(self.layers, self._layers)
        
    def _forward_impl(self, x):
        for l in self._layers:
            x = self.layers[l](x)
        return x

    def forward(self, x):
        return self._forward_impl(x)

In [150]:
# model

In [151]:
# model._modules

In [152]:
new_model = ModifiedFasterRCNN('rpn', NUM_CLASSES)

OrderedDict([('transform', GeneralizedRCNNTransform(
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    Resize(min_size=(800,), max_size=1333, mode='bilinear')
)), ('backbone', BackboneWithFPN(
  (body): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): FrozenBatchNorm2d(64, eps=0.0)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): FrozenBatchNorm2d(64, eps=0.0)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): FrozenBatchNorm2d(64, eps=0.0)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): FrozenBatchNorm2d(256, eps=0.0)
        (relu): ReLU(inplace=True)
        (downsample

In [153]:
new_model._layers

['transform', 'backbone', 'rpn']

In [154]:
# new_model._modules

In [155]:
image_tensor_list = list(train_dataset[0][0].unsqueeze(dim=0).float())
image_tensors = train_dataset[0][0].unsqueeze(dim=0).float()

In [156]:
# new_model(image_tensor_list)
new_model(image_tensors)

TypeError: conv2d() received an invalid combination of arguments - got (tuple, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!tuple!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)


In [157]:
image_tensor_list

[tensor([[[0.5604, 0.5908, 0.5165,  ..., 0.6383, 0.6124, 0.6366],
          [0.5950, 0.6133, 0.5478,  ..., 0.6471, 0.6531, 0.6336],
          [0.5605, 0.5842, 0.5786,  ..., 0.6388, 0.6461, 0.6392],
          ...,
          [0.5111, 0.9812, 0.8103,  ..., 0.8676, 0.8667, 0.8638],
          [0.4908, 0.9786, 0.8091,  ..., 0.8688, 0.8671, 0.8668],
          [0.4893, 0.9779, 0.8030,  ..., 0.8705, 0.8697, 0.8693]],
 
         [[0.4947, 0.5031, 0.4424,  ..., 0.5372, 0.5121, 0.5410],
          [0.5157, 0.5327, 0.4663,  ..., 0.5436, 0.5528, 0.5386],
          [0.4800, 0.4959, 0.4898,  ..., 0.5544, 0.5534, 0.5348],
          ...,
          [0.3196, 0.7976, 0.6864,  ..., 0.8157, 0.8154, 0.8140],
          [0.3017, 0.7944, 0.6847,  ..., 0.8174, 0.8157, 0.8157],
          [0.3024, 0.7966, 0.6819,  ..., 0.8159, 0.8187, 0.8157]],
 
         [[0.4211, 0.4494, 0.3956,  ..., 0.4355, 0.4170, 0.4447],
          [0.4629, 0.4979, 0.4223,  ..., 0.4494, 0.4491, 0.4505],
          [0.4201, 0.4419, 0.4461,  ...,

In [158]:
model = create_model(num_classes=NUM_CLASSES)

In [159]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.load_state_dict(torch.load(
    'outputs/model10.pth', map_location=device
))

<All keys matched successfully>

In [160]:
model._modules

OrderedDict([('transform',
              GeneralizedRCNNTransform(
                  Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                  Resize(min_size=(800,), max_size=1333, mode='bilinear')
              )),
             ('backbone',
              BackboneWithFPN(
                (body): IntermediateLayerGetter(
                  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
                  (bn1): FrozenBatchNorm2d(64, eps=0.0)
                  (relu): ReLU(inplace=True)
                  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
                  (layer1): Sequential(
                    (0): Bottleneck(
                      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                      (bn1): FrozenBatchNorm2d(64, eps=0.0)
                      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    

In [161]:
model.eval()
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# model.fc0.conv2.register_forward_hook(get_activation('fc0.conv2'))
# model.fc1.conv2.register_forward_hook(get_activation('fc1.conv2'))

model.rpn.head.cls_logits.register_forward_hook(get_activation('rpn.head.cls_logits'))
model.rpn.head.bbox_pred.register_forward_hook(get_activation('rpn.head.bbox_pred'))
model.roi_heads.box_roi_pool.register_forward_hook(get_activation('roi_heads.box_roi_pool'))
model.rpn.head.conv.register_forward_hook(get_activation('rpn.head.conv'))
model.backbone.body.conv1.register_forward_hook(get_activation('backbone.body.conv1'))
# model.transform.register_forward_hook(get_activation('transform'))

output = model(image_tensor_list)
print(activation['rpn.head.cls_logits'].shape)
print(activation['rpn.head.bbox_pred'].shape)
print(activation['roi_heads.box_roi_pool'].shape)
print(activation['rpn.head.conv'].shape)
print(activation['backbone.body.conv1'].shape)
# print(activation['transform'])

************************************************** Proposals: tensor([[[ -23.1437,   -6.2865,   10.3502,    7.2821],
         [ -16.2483,  -14.2198,    9.0961,    7.9199],
         [  -7.8263,  -21.6016,    5.0803,    7.6745],
         ...,
         [ 373.1503,  605.9153, 1253.1857,  863.8683],
         [ 521.6294,  479.1609,  981.7117,  971.8431],
         [ 628.8538,  416.1788,  873.7280, 1175.4242]]]) **************************************************
Detections: [{'boxes': tensor([[159.3287, 216.6688, 196.2327, 250.1567],
        [126.5542, 237.7568, 160.0732, 255.2524],
        [125.4650, 237.3560, 165.5432, 255.5345],
        [138.9002, 239.8211, 159.5252, 253.0626],
        [138.5529, 239.8377, 159.7203, 253.2575],
        [ 83.1316, 234.5290, 104.5298, 246.9166],
        [124.4000, 238.8559, 150.4163, 253.5836],
        [209.8943, 241.9961, 213.5949, 249.8562],
        [154.6138, 242.4431, 167.1671, 252.1942],
        [146.6598, 241.3135, 161.5026, 252.4121],
        [146.6101,

In [162]:
output

[{'boxes': tensor([[159.3287, 216.6688, 196.2327, 250.1567],
          [126.5542, 237.7568, 160.0732, 255.2524],
          [125.4650, 237.3560, 165.5432, 255.5345],
          [138.9002, 239.8211, 159.5252, 253.0626],
          [138.5529, 239.8377, 159.7203, 253.2575],
          [ 83.1316, 234.5290, 104.5298, 246.9166],
          [124.4000, 238.8559, 150.4163, 253.5836],
          [209.8943, 241.9961, 213.5949, 249.8562],
          [154.6138, 242.4431, 167.1671, 252.1942],
          [146.6598, 241.3135, 161.5026, 252.4121],
          [146.6101, 241.4081, 161.2811, 252.4326]], grad_fn=<StackBackward0>),
  'labels': tensor([1, 2, 1, 2, 1, 1, 1, 4, 1, 1, 2]),
  'scores': tensor([0.9962, 0.5975, 0.4174, 0.1774, 0.1714, 0.1523, 0.0825, 0.0795, 0.0614,
          0.0551, 0.0524], grad_fn=<IndexBackward0>)}]

In [163]:
box_preds_rpn = activation['rpn.head.bbox_pred'].squeeze(dim=0)
print("New Shape:", box_preds_rpn.shape)
for box_pred in box_preds_rpn:
    print("--------")
    print(box_pred[0:4])
    print(box_pred[4:8])
    print(box_pred[8:12])
    print("--------")

New Shape: torch.Size([12, 13, 13])
--------
tensor([[-1.4897e-01, -6.6571e-02,  4.4681e-02,  6.6937e-02,  5.7879e-02,
         -7.0222e-02, -3.8880e-02, -7.0869e-03, -1.1052e-02, -1.1668e-01,
         -1.0228e-01,  1.3930e-02,  1.5279e-01],
        [-2.1410e-01, -8.3659e-02,  1.1434e-01,  1.0026e-01,  1.0355e-01,
         -8.1492e-02, -5.0290e-02, -1.7003e-02, -3.4470e-03, -1.7020e-01,
         -1.2518e-01,  5.9340e-02,  2.3220e-01],
        [-1.8532e-01, -1.3795e-02,  1.0509e-01,  5.2513e-02,  2.9229e-02,
         -6.7880e-02, -5.9446e-02, -5.0074e-02, -2.3555e-02, -1.2508e-01,
         -9.1958e-02, -2.6917e-02,  1.9452e-01],
        [-1.7924e-01, -1.7145e-02,  1.0906e-01,  4.4855e-02,  2.3853e-03,
         -6.4233e-02, -5.3513e-02, -2.1893e-02, -1.6783e-04, -1.1155e-01,
         -1.0695e-01, -5.0230e-02,  1.6874e-01]])
tensor([[-0.1548, -0.0092,  0.0938,  0.0156, -0.0511, -0.0817, -0.0887, -0.0189,
         -0.0068, -0.0879, -0.1030, -0.0280,  0.1678],
        [-0.1397,  0.0223,  0.

In [164]:
import torch
a = torch.rand((1,13))
# rel_codes[:, 0::4]

In [165]:
a

tensor([[8.0153e-01, 5.7518e-01, 1.1439e-01, 3.7579e-01, 5.3011e-02, 5.4603e-01,
         6.2854e-01, 6.6203e-04, 6.8370e-02, 5.9962e-02, 5.2412e-01, 7.0817e-01,
         3.9294e-01]])

In [166]:
a[:, 0::4]

tensor([[0.8015, 0.0530, 0.0684, 0.3929]])

In [None]:
a[:, 1::4]

In [None]:
a[:, 2::4]

In [None]:
a[:, 3::4]