In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
from torchvision.models.detection.anchor_utils import AnchorGenerator

In [2]:
backbone = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.DEFAULT)

In [3]:
backbone = backbone.features
backbone.out_channels = 1280

In [4]:
backbone

Sequential(
  (0): Conv2dNormActivation(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (2): SiLU(inplace=True)
  )
  (1): Sequential(
    (0): FusedMBConv(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
      )
      (stochastic_depth): StochasticDepth(p=0.0, mode=row)
    )
    (1): FusedMBConv(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
      )
      (stochastic_de

In [5]:
anchor_generator = AnchorGenerator(sizes=((64,128,256,512),),aspect_ratios=((0.5,1,2),))

In [6]:
backbone.out_channels = 1280

In [7]:
model = torchvision.models.detection.FasterRCNN(backbone=backbone,num_classes=20,rpn_anchor_generator=anchor_generator).cuda()
try:
    model.load_state_dict(torch.load('C:\\Users\\get2b\\Desktop\\Arav\\AI ML\\Neural Networks\\Pytorch implementation\\All Models Saved\\FasterRCNN-efficientNet.pth'))
except:
    print("No currently saved Models available")

In [8]:
dataset = torchvision.datasets.VOCDetection(root=r'C:\Users\get2b\Desktop\Arav\AI ML\DATA SETS\Neural Network Datasets\Object Detection',year="2007",image_set='trainval')

In [9]:
dataset[0]

(<PIL.Image.Image image mode=RGB size=500x375>,
 {'annotation': {'folder': 'VOC2007',
   'filename': '000005.jpg',
   'source': {'database': 'The VOC2007 Database',
    'annotation': 'PASCAL VOC2007',
    'image': 'flickr',
    'flickrid': '325991873'},
   'owner': {'flickrid': 'archintent louisville', 'name': '?'},
   'size': {'width': '500', 'height': '375', 'depth': '3'},
   'segmented': '0',
   'object': [{'name': 'chair',
     'pose': 'Rear',
     'truncated': '0',
     'difficult': '0',
     'bndbox': {'xmin': '263', 'ymin': '211', 'xmax': '324', 'ymax': '339'}},
    {'name': 'chair',
     'pose': 'Unspecified',
     'truncated': '0',
     'difficult': '0',
     'bndbox': {'xmin': '165', 'ymin': '264', 'xmax': '253', 'ymax': '372'}},
    {'name': 'chair',
     'pose': 'Unspecified',
     'truncated': '1',
     'difficult': '1',
     'bndbox': {'xmin': '5', 'ymin': '244', 'xmax': '67', 'ymax': '374'}},
    {'name': 'chair',
     'pose': 'Unspecified',
     'truncated': '0',
     '

In [10]:
trainset = []
valset = []
for i in range(0,4008):
    trainset.append(dataset[i])
for i in range(4008,len(dataset)):
    valset.append(dataset[i])

In [11]:
optimizer = torch.optim.SGD(params=model.parameters(),lr = 1e-3,momentum=0.9)
lr_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer,base_lr=1e-3,max_lr=1e-2,
                                                 step_size_down=5,step_size_up=5,
                                                 mode='triangular2',cycle_momentum=False)

In [12]:
trainset[1]

(<PIL.Image.Image image mode=RGB size=500x333>,
 {'annotation': {'folder': 'VOC2007',
   'filename': '000007.jpg',
   'source': {'database': 'The VOC2007 Database',
    'annotation': 'PASCAL VOC2007',
    'image': 'flickr',
    'flickrid': '194179466'},
   'owner': {'flickrid': 'monsieurrompu', 'name': 'Thom Zemanek'},
   'size': {'width': '500', 'height': '333', 'depth': '3'},
   'segmented': '0',
   'object': [{'name': 'car',
     'pose': 'Unspecified',
     'truncated': '1',
     'difficult': '0',
     'bndbox': {'xmin': '141', 'ymin': '50', 'xmax': '500', 'ymax': '330'}}]}})

In [13]:
def numerical_labelling(x):
    if x=='aeroplane':
        index = 0
    elif x=='bicycle':
        index = 1
    elif x=="bird":
        index = 2
    elif x=="boat":
        index = 3
    elif x=="bottle":
        index = 4
    elif x=="bus":
        index = 5
    elif x=="car":
        index = 6
    elif x=="cat":
        index = 7
    elif x=="chair":
        index = 8
    elif x=="cow":
        index = 9
    elif x=="diningtable":
        index = 10
    elif x=="dog":
        index = 11
    elif x=="horse":
        index = 12
    elif x=='motorbike':
        index = 13
    elif x=="person":
        index = 14
    elif x=="pottedplant":
        index = 15
    elif x=="sheep":
        index = 16
    elif x=="sofa":
        index = 17
    elif x=="train":
        index = 18
    else:
        index = 19
    return index

In [14]:
trainset=[]
trainset_size=[]
valset=[]
valset_size=[]
target_train=[]
target_val=[]
for i in range(0,4008):
    trainset.append(torchvision.transforms.ToTensor()(dataset[i][0]))
    t_train=[]
    for j in dataset[i][1]['annotation']['object']:
        xmin = int(j['bndbox']['xmin'])
        xmax = int(j['bndbox']['xmax'])
        ymin = int(j['bndbox']['ymin'])
        ymax = int(j['bndbox']['ymax'])
        xc = (xmax-xmin)/2
        yc = (ymax-ymin)/2
        w = (xmax+xmin)/2
        h = (ymax+ymin)/2
        c = j['name']
        t_train.append({"boxes":torch.tensor([xc,yc,w,h]).unsqueeze(0),"labels":torch.tensor([numerical_labelling(c)],dtype=torch.int64)})
    target_train.append(t_train)
    trainset_size.append(dataset[i][0].size)

for i in range(4008,len(dataset)):
    valset.append(torchvision.transforms.ToTensor()(dataset[i][0]))
    t_val = []
    for j in dataset[i][1]['annotation']['object']:
        xmin = int(j['bndbox']['xmin'])
        xmax = int(j['bndbox']['xmax'])
        ymin = int(j['bndbox']['ymin'])
        ymax = int(j['bndbox']['ymax'])
        xc = (xmax-xmin)/2
        yc = (ymax-ymin)/2
        w = (xmax+xmin)/2
        h = (ymax+ymin)/2
        c = j['name']
        t_val.append({"boxes":torch.tensor([xc,yc,w,h]).unsqueeze(0),"labels":torch.tensor([numerical_labelling(c)],dtype=torch.int64)})
    target_val.append(t_val)
    valset_size.append(dataset[i][0].size)

In [15]:
i=0
for target in target_train[1]:
   print(target['boxes'])
   i+=1
   if i==5 : break

tensor([[179.5000, 140.0000, 320.5000, 190.0000]])


In [16]:
for target in target_train[0]:
    print(target)

{'boxes': tensor([[ 30.5000,  64.0000, 293.5000, 275.0000]]), 'labels': tensor([8])}
{'boxes': tensor([[ 44.,  54., 209., 318.]]), 'labels': tensor([8])}
{'boxes': tensor([[ 31.,  65.,  36., 309.]]), 'labels': tensor([8])}
{'boxes': tensor([[ 27.0000,  52.5000, 268.0000, 246.5000]]), 'labels': tensor([8])}
{'boxes': tensor([[ 17.5000,  17.0000, 294.5000, 203.0000]]), 'labels': tensor([8])}


In [17]:
import torchvision.models.detection._utils as det_utils
import torch.nn.functional as F
import torch.nn as nn
# Helper functions from fastai
def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss


# Implementation from fastai https://github.com/fastai/fastai2/blob/master/fastai2/layers.py#L338
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, ε:float=0.1, reduction='mean'):
        super().__init__()
        self.ε,self.reduction = ε,reduction
    
    def forward(self, output, target):
        # number of classes
        c = output.size()[-1]
        log_preds = F.log_softmax(output, dim=-1)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        # (1-ε)* H(q,p) + ε*H(u,p)
        return (1-self.ε)*nll + self.ε*(loss/c) 
custom_loss = LabelSmoothingCrossEntropy()
#torchvision.models.detection.roi_heads.fastrcnn_loss??
def custom_fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    Computes the loss for Faster R-CNN.

    Arguments:
        class_logits (Tensor)
        box_regression (Tensor)
        labels (list[BoxList])
        regression_targets (Tensor)

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """
    
    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    classification_loss = custom_loss(class_logits, labels) #ADDING THE CUSTOM LOSS HERE

    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    sampled_pos_inds_subset = torch.where(labels > 0)[0]
    labels_pos = labels[sampled_pos_inds_subset]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, -1, 4)

    box_loss = det_utils.smooth_l1_loss(
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        beta=1 / 9,
        size_average=False,
    )
    box_loss = box_loss / labels.numel()

    return classification_loss, box_loss

In [18]:
b = torch.rand((5,4))
b

tensor([[0.2948, 0.7079, 0.6214, 0.2908],
        [0.7138, 0.9318, 0.0500, 0.0660],
        [0.6969, 0.3922, 0.6350, 0.6437],
        [0.3202, 0.0540, 0.7220, 0.9884],
        [0.5455, 0.4729, 0.1668, 0.6189]])

In [19]:
for images, target in zip(trainset,target_train):
    print(images)
    print(target)
    break

tensor([[[0.0392, 0.0510, 0.0392,  ..., 0.6392, 0.6353, 0.6314],
         [0.0118, 0.0275, 0.0431,  ..., 0.6392, 0.6392, 0.6392],
         [0.1765, 0.0941, 0.0588,  ..., 0.6471, 0.6471, 0.6471],
         ...,
         [0.1216, 0.1412, 0.1137,  ..., 0.2275, 0.2392, 0.2275],
         [0.1412, 0.1333, 0.1490,  ..., 0.2588, 0.2745, 0.2745],
         [0.1922, 0.2353, 0.2863,  ..., 0.2510, 0.2667, 0.2706]],

        [[0.0392, 0.0510, 0.0392,  ..., 0.7373, 0.7333, 0.7294],
         [0.0157, 0.0353, 0.0431,  ..., 0.7373, 0.7373, 0.7373],
         [0.1922, 0.1098, 0.0627,  ..., 0.7451, 0.7451, 0.7451],
         ...,
         [0.0471, 0.0667, 0.0392,  ..., 0.2824, 0.2824, 0.2784],
         [0.0353, 0.0275, 0.0314,  ..., 0.3137, 0.3255, 0.3255],
         [0.0667, 0.0863, 0.0980,  ..., 0.3098, 0.3255, 0.3294]],

        [[0.0392, 0.0510, 0.0392,  ..., 0.7647, 0.7608, 0.7569],
         [0.0235, 0.0314, 0.0510,  ..., 0.7647, 0.7647, 0.7647],
         [0.2039, 0.1137, 0.0784,  ..., 0.7725, 0.7725, 0.

In [24]:
def test():
    model.eval()
    total = 0.0
    accuracy = 0.0
    with torch.no_grad():
        
        for images ,target in zip(valset,target_val):
            
            inputs, labels = inputs.cuda(),labels.cuda()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total+=labels.size(0)
            accuracy+=(predicted == labels).sum().item()
    
    print("{}/{}".format(accuracy,total))
    accuracy = float(100 * accuracy / total)
    
    return(accuracy)



def train(epoch):
    for ep in range(1,epoch+1):
        print("Epoch : ",ep)
        running_loss_classifier=0
        running_loss_box_reg = 0
        running_loss_objectness = 0
        running_loss_rpn_box_reg = 0
        j=int(0)
        for images, target in zip(trainset,target_train):
            j+=1
            images = images.cuda()
            for i in target:
                i['boxes'] = i['boxes'].cuda()
                i['labels'] = i['labels'].cuda()
            optimizer.zero_grad(set_to_none=False)
            outputs = model(images.unsqueeze(0),target)
            outputs['loss_classifier'].backward(retain_graph=True)
            outputs['loss_box_reg'].backward(retain_graph=True)
            outputs['loss_objectness'].backward(retain_graph=True)
            outputs['loss_rpn_box_reg'].backward()
            optimizer.step()
            running_loss_classifier+=outputs['loss_classifier'].item()
            running_loss_box_reg += outputs['loss_box_reg'].item()
            running_loss_objectness += outputs['loss_objectness'].item()
            running_loss_rpn_box_reg += outputs['loss_rpn_box_reg'].item()
            if (j+1)%100 == 0 :
                print("[{}/{}]".format(j+1,len(trainset)))
                print("Loss classifier of the training model is : %.4f"%(running_loss_classifier/1000))
                print("Loss box_reg of the training model is : %.4f"%(running_loss_box_reg/1000))
                print("Loss objectness of the training model is : %.4f"%(running_loss_objectness/1000))
                print("Loss rpn_box_reg of the training model is : %.4f"%(running_loss_rpn_box_reg/1000))
                running_loss_classifier,running_loss_box_reg,running_loss_objectness,running_loss_rpn_box_reg=0,0,0,0
        lr_scheduler.step()
        clear_output(wait=True)

In [25]:
train(25)

Epoch :  1
[100/4008]
Loss classifier of the training model is : 0.0625
Loss box_reg of the training model is : 0.0013
Loss objectness of the training model is : 0.0333
Loss rpn_box_reg of the training model is : 0.0052
[200/4008]
Loss classifier of the training model is : 0.0084
Loss box_reg of the training model is : 0.0016
Loss objectness of the training model is : 0.0213
Loss rpn_box_reg of the training model is : 0.0065
[300/4008]
Loss classifier of the training model is : 0.0078
Loss box_reg of the training model is : 0.0014
Loss objectness of the training model is : 0.0169
Loss rpn_box_reg of the training model is : 0.0048
[400/4008]
Loss classifier of the training model is : 0.0073
Loss box_reg of the training model is : 0.0019
Loss objectness of the training model is : 0.0146
Loss rpn_box_reg of the training model is : 0.0033
[500/4008]
Loss classifier of the training model is : 0.0070
Loss box_reg of the training model is : 0.0019
Loss objectness of the training model is : 0.

: 

: 

In [None]:
torch.save(model.state_dict(), 'C:\\Users\\get2b\\Desktop\\Arav\\AI ML\\Neural Networks\\Pytorch implementation\\All Models Saved\\FasterRCNN-efficientNet.pth')