In [None]:
import torch
import torchvision
import torch.nn as nn
from torchsummary import summary
import warnings
from torch.nn.modules.utils import _pair
import numpy as np

In [None]:
class conv(nn.Module):
    
    def __init__(self, c, s):
    
        super().__init__()
        
        if s==1:
            self.seq = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=s,padding=1,kernel_size=3),nn.BatchNorm2d(num_features=c),nn.SiLU())
        if s==2:
            self.seq = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=2*c,stride=s,padding=1,kernel_size=3),nn.BatchNorm2d(num_features=2*c),nn.SiLU())
    
    def forward(self, x):
        
        x = self.seq(x)
        
        return x

In [None]:
class MPconv(nn.Module):
    
    def __init__(self,c):
        
        super().__init__()
        
        self.maxpool = nn.MaxPool2d(kernel_size=2,stride=1)
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=2*c,stride=2,kernel_size=1),nn.BatchNorm2d(num_features=2*c),nn.SiLU())
        self.conv2 = conv(c,2)
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=4*c,out_channels=2*c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=2*c),nn.SiLU())
        
    def forward(self,x):
        
        xa = self.maxpool(x)
        xa = self.conv1(xa)
        
        xb = self.conv3(x)
        xb = self.conv2(xb)
        
        x = torch.concat((xa,xb),dim=1)
        
        x = self.conv4(x)
        
        return x

In [None]:
class REPconv(nn.Module):
    
    def __init__(self,c):
        
        super().__init__()
        
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.conv2 = conv(c,1)
        self.bn = nn.BatchNorm2d(c)
    
    def forward(self,x):
        
        x1 = self.conv1(x)
        x2 = self.conv2(x)
        x3 = self.bn(x)
        
        if self.training:
            x = torch.add(x1,x2)
            x = torch.add(x,x3)
            return x
        else:
            return x1

In [None]:
class ELAN1(nn.Module):
    
    def __init__(self,c):
        
        super().__init__()
        
        self.convELEN1a = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.convELEN1b = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.convELEN1c = conv(c,1)
        self.convELEN1d = conv(c,1)
        self.convELEN1e = conv(c,1)
        self.convELEN1f = conv(c,1)
        self.convELEN1g = nn.Sequential(nn.Conv2d(in_channels=3*c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        
    def forward(self,x):
        
        x1 = self.convELEN1a(x)
        x2 = self.convELEN1b(x)
        x3 = self.convELEN1c(x1)
        x3 = self.convELEN1d(x3)
        x4 = self.convELEN1e(x3)
        x4 = self.convELEN1f(x4)
        
        x = torch.cat((x2,x3,x4),dim=1)
        
        x = self.convELEN1g(x)
        
        return x

In [None]:
class ELAN2(nn.Module):
    
    def __init__(self,c):
        
        super().__init__()
        
        self.convELEN1a = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.convELEN1b = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.convELEN1c = conv(c,1)
        self.convELEN1d = conv(c,1)
        self.convELEN1e = conv(c,1)
        self.convELEN1f = conv(c,1)
        self.convELEN1g = nn.Sequential(nn.Conv2d(in_channels=5*c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        
    def forward(self,x):
        
        x1 = self.convELEN1a(x)
        x2 = self.convELEN1b(x)
        x3 = self.convELEN1c(x1)
        x4 = self.convELEN1d(x3)
        x5 = self.convELEN1e(x4)
        x6 = self.convELEN1f(x5)
        
        x = torch.concat((x2,x3,x4,x5,x6),dim=1)
        
        x = self.convELEN1g(x)
        
        return x

In [None]:
class SPPCSPC(nn.Module):
    
    def __init__(self,c):
        
        super().__init__()
        
        self.conv1a = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.conv1b = conv(c,1)
        self.conv1c = nn.Sequential(nn.Conv2d(in_channels=c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.maxpool1a = nn.MaxPool2d(kernel_size=5,padding=2)
        self.maxpool1b = nn.MaxPool2d(kernel_size=9,padding=4)
        self.maxpool1c = nn.MaxPool2d(kernel_size=13,padding=6)
        self.conv1d = nn.Sequential(nn.Conv2d(in_channels=4*c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        self.conv1e = conv(c,1)
        self.conv1f = nn.Sequential(nn.Conv2d(in_channels=2*c,out_channels=c,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=c),nn.SiLU())
        
    def forward(self,x):
        
        x1a = self.conv1a(x)
        x2 = self.conv2(x)
        
        x1b = self.conv1b(x1a)
        x1c = self.conv1c(x1b)
        x1ma = self.maxpool1a(x1c)
        sfa = (x1c.shape[2]/x1ma.shape[2])
        x1ma = nn.UpsamplingBilinear2d(scale_factor=sfa)(x1ma)
        x1mb = self.maxpool1b(x1c)
        sfb = (x1c.shape[2]/x1mb.shape[2])
        x1mb = nn.UpsamplingBilinear2d(scale_factor=sfb)(x1mb)
        x1mc = self.maxpool1c(x1c)
        sfc = (x1c.shape[2]/x1mc.shape[2])
        x1mc = nn.UpsamplingBilinear2d(scale_factor=sfc)(x1mc)
        x1c = torch.cat((x1c,x1ma,x1mb,x1mc),dim=1)
        
        x1d = self.conv1d(x1c)
        x1e = self.conv1e(x1d)
        x = torch.cat((x1e,x2),dim=1)
        
        x = self.conv1f(x)
        
        return x

In [None]:
class Backbone(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=3,out_channels=32,stride=1,kernel_size=3,padding=1),nn.BatchNorm2d(num_features=32),nn.SiLU())
        self.conv2 = conv(32,2)         #This will give output of 64 channels
        self.conv3 = conv(64,1)
        self.conv4 = conv(64,2)         #This will give output of 128 channels
        self.ELAN1a = ELAN1(128)
        self.MPconv1 = MPconv(128)      #This will give output of 256 channels
        self.ELAN1b = ELAN1(256)
        self.MPconv2 = MPconv(256)      #This will give output of 512 channels
        self.ELAN1c = ELAN1(512)
        self.MPconv3 = MPconv(512)      #This will give output of 1024 channels
        self.ELAN1d = ELAN1(1024)
        self.SPPCSPC = SPPCSPC(1024)
        
    def forward(self,x):
        
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.ELAN1a(x)
        x = self.MPconv1(x)
        x1 = self.ELAN1b(x)
        x2 = self.MPconv2(x1)
        x2 = self.ELAN1c(x2)
        x3 = self.MPconv3(x2)
        x3 = self.ELAN1d(x3)
        x3 = self.SPPCSPC(x3)
        
        return (x1,x2,x3)

In [None]:
class head(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        
        self.backbone = Backbone()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1024,out_channels=512,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=512),nn.SiLU())
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=512,out_channels=512,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=512),nn.SiLU())
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=256,out_channels=256,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=256),nn.SiLU())
        self.upsample1 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.convCat1 = nn.Sequential(nn.Conv2d(in_channels=1024,out_channels=512,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=512),nn.SiLU())
        self.ELAN2a = ELAN2(512)
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=512,out_channels=256,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=256),nn.SiLU())
        self.upsample2 = nn.UpsamplingBilinear2d(scale_factor=2)
        self.convCat2 = nn.Sequential(nn.Conv2d(in_channels=768,out_channels=256,stride=1,kernel_size=1),nn.BatchNorm2d(num_features=256),nn.SiLU())
        
        self.ELAN2b = ELAN2(256)
        self.MPconv1 = MPconv(256) # The output for this is 512 channels
        self.ELAN2c = ELAN2(512)
        self.MPconv2 = MPconv(512) # The output for this is 1024 channels
        self.ELAN2d = ELAN2(1024)

        
    def forward(self,x):
        
        y = self.backbone(x)
        x1 = self.conv1(y[2])
        x1 = self.upsample1(x1)
        x1 = torch.cat((x1,y[1]),dim=1)
        x1 = self.convCat1(x1)
        x1 = self.ELAN2a(x1)
        
        x2 = self.conv2(x1)
        x2 = self.upsample2(x2)
        x2 = torch.cat((x2,y[0]),dim=1)
        x2 = self.convCat2(x2)
        x2 = self.ELAN2b(x2)
        
        x3 = self.MPconv1(x2)
        x3 = torch.cat((x1,x3),dim=1)
        x3 = nn.Conv2d(in_channels=1024,out_channels=512,kernel_size=1)(x3)
        x3 = self.ELAN2c(x3)
        
        x4 = self.MPconv2(x3)
        x4 = torch.cat((x4,y[2]),dim=1)
        x4 = nn.Conv2d(in_channels=2048,out_channels=1024,kernel_size=1)(x4)
        x4 = self.ELAN2d(x4)
        
        
        return (x2,x3,x4)

In [None]:
class detection_head(nn.Module):
    
    def __init__(self,no_of_classes,no_of_anchors):
        
        super().__init__()
        
        self.head = head()
        
        self.BBRegConv_20x20 = nn.Conv2d(in_channels=1024,out_channels=4*no_of_anchors,kernel_size=1,stride=1)
        self.BBRegConv_40x40 = nn.Conv2d(in_channels=512,out_channels=4*no_of_anchors,kernel_size=1,stride=1)
        self.BBRegConv_80x80 = nn.Conv2d(in_channels=256,out_channels=4*no_of_anchors,kernel_size=1,stride=1)
        
        self.objectness_20x20 = nn.Conv2d(in_channels=1024,out_channels=1*no_of_anchors,kernel_size=1,stride=1)
        self.objectness_40x40 = nn.Conv2d(in_channels=512,out_channels=1*no_of_anchors,kernel_size=1,stride=1)
        self.objectness_80x80 = nn.Conv2d(in_channels=256,out_channels=1*no_of_anchors,kernel_size=1,stride=1)
        
        self.classes_20x20 = nn.Conv2d(in_channels=1024,out_channels=no_of_classes*no_of_anchors,kernel_size=1,stride=1)
        self.classes_40x40 = nn.Conv2d(in_channels=512,out_channels=no_of_classes*no_of_anchors,kernel_size=1,stride=1)
        self.classes_80x80 = nn.Conv2d(in_channels=256,out_channels=no_of_classes*no_of_anchors,kernel_size=1,stride=1)
        
    def forward(self,x):
        
        x = self.head(x)
        # x is the output of the head class which has the follow layout
        # (x2,x3,x4) has 256, 512, 1024 filters respectively
        # which means that the feature map sizes are 80x80,40x40, and 20x20 for a 640x640 image
        
        reg1 = self.BBRegConv_20x20(x[2])
        reg2 = self.BBRegConv_40x40(x[1])
        reg3 = self.BBRegConv_80x80(x[0])
        
        obj1 = self.objectness_20x20(x[2])
        obj2 = self.objectness_40x40(x[1])
        obj3 = self.objectness_80x80(x[0])
        
        cls1 = self.classes_20x20(x[2])
        cls2 = self.classes_40x40(x[1])
        cls3 = self.classes_80x80(x[0])
        
        return((reg1,obj1,cls1),(reg2,obj2,cls2),(reg3,obj3,cls3),x)

In [None]:
model = detection_head(20,3)
a = torch.rand((4,3,640,640))
model(a)[0][2].shape

torch.Size([4, 60, 20, 20])

In [None]:
def anchors(feat_map, img_size):
    
    center_points = []
    for feat_maps in feat_map:
        cp=[]
        for i in range(feat_maps.shape[2]):
            for j in range(feat_maps.shape[2]):
                cp.append((i,j))
        center_points.append(cp)
    
    useful_anchors = []
    for feat_maps in feat_map:
        anchor_types = np.asarray(((128,128),(128,256),(256,128)))/(img_size/feat_maps.shape[2])
        useful_anchors1 = []
        useful_anchors2 = []
        useful_anchors3 = []
        x = y = feat_maps.shape[2]
        for k in anchor_types:
            flag = True
            for i in range(x):
                for j in range(y):
                    if((i - (k[0]/2)) < 0 or (j - (k[1]/2)) < 0 or (i + (k[0]/2)) > x or (j + (k[1]/2)) > y):
                        flag = False
                    else:
                        if(k[0] == k[1]):
                            useful_anchors1.append((i,j,k[0]/2,k[1]/2))
                        elif (k[0]<k[1]):
                            useful_anchors2.append((i,j,k[0]/2,k[1]/2))
                        else:
                            useful_anchors3.append((i,j,k[0]/2,k[1]/2))
        useful_anchors.append((useful_anchors1,useful_anchors2,useful_anchors3))
    return useful_anchors

In [None]:
class IoU(nn.Module):
    
    def __init__(self,img_size,no_of_classes,ground_truth,feat_map) -> None:
        
        super().__init__()
        
        self.img_size = img_size
        self.no_of_classes = no_of_classes
        self.ground_truth = ground_truth
        self.feat_maps = feat_map
        
    def forward(self,x,feat_maps):
        
        obj = []
        reg = []
        cla = []    # cla is for class
        # range is from 0 to 3 since we have only 3 anchor boxes
        for i in range(0,3):
            obj.append(x[ : , ( 0 + ( 1 * i ) ) : ( 1 + ( 1 * i ) ) , : , : ])
            reg.append(x[ : , ( 0 + ( 4 * i ) ) : ( 4 + ( 4 * i ) ) , : , : ])
            cla.append(x[:,(0+(self.no_of_classes*i)):(self.no_of_classes+(self.no_of_classes*i)),:,:])
        # The three sizes of the anchors are ((128,128),(128,256),(256,128))
        anchor_boxes = anchors(self.feat_maps, self.img_size)

In [None]:
class loss(nn.Module):
    
    def __init__(self,no_of_classes,img_size, ground_truth) -> None:
        
        super().__init__()
        
        self.detection_head = detection_head(no_of_classes=no_of_classes,no_of_anchors=3)
        self.IoU = IoU(img_size,no_of_classes,ground_truth)
        
    def forward(self,x):
        
        x = self.detection_head(x)
        # x[0] = it has the 20x20 feature maps info
        # x[1] = it has the 40x40 feature maps info
        # x[2] = it has the 80x80 feature maps info
        iou1 = self.IoU(x[0],x[3])
        # The three sizes of the anchors are ((128,128),(128,256),(256,128))

In [None]:
len(anchors(torch.rand((4,1024,20,20)),640)[0])


289

In [None]:
ground_truth = torch.Tensor([[2,2,5,6],[3,3,7,8]])
x = torch.rand((4,60,20,20))
reg = []
for i in range(0,3):
    reg.append(x[ : , ( 0 + ( 4 * i ) ) : ( 4 + ( 4 * i ) ) , : , : ])
reg[1].shape

torch.Size([4, 4, 20, 20])

In [None]:
len(anchors(feat_maps=torch.rand((4,1024,20,20)),img_size=640))

731

In [None]:
img_size = 640
feat_maps = torch.rand((4,3,20,20))
anchor_types = np.asarray([[128,128],[128,256],[256,128]])/(img_size/feat_maps.shape[2])
anchor_types

array([[4., 4.],
       [4., 8.],
       [8., 4.]])

In [None]:
def anchor_type(size = (4,8,16),ratio = (2,1,0.5)):
    anchor_types = []
    for i in size:
        for j in ratio:
            anchor_types.append((int(i),int(i*j)))
    return anchor_types
def anchorGen(feat_maps):
    center_points = []
    for feat_map in feat_maps:
        cp=[]
        for i in range(feat_map.shape[2]):
            for j in range(feat_map.shape[2]):
                cp.append((i,j))
        center_points.append(cp)
    return center_points
def usable_anchors(feat_maps,size=(4,8,16),ratio=(2,1,0.5)):
    anchor_types = anchor_type()
    useful_anchors = []
    x = y = feat_maps.shape[2]
    for i in range(x):
        for j in range(y):
            flag = True
            for k in anchor_types:
                if((i - (k[0]/2)) < 0 or (j - (k[1]/2)) < 0 or (i + (k[0]/2)) > x or (j + (k[1]/2)) > y):
                    flag = False
                else:
                    useful_anchors.append((i,j,k[0]/2,k[1]/2))
    return useful_anchors
def convert(anchors_to_be_converted):
    converted_anchors = []
    for i in anchors_to_be_converted:
        xmax = (i[0] + i[2]/2)
        ymax = (i[1] + i[3]/2)
        xmin = (i[0] - i[2]/2)
        ymin = (i[1] - i[3]/2)
        converted_anchors.append((xmax,ymax,xmin,ymin))
    return converted_anchors
def iou_threshold(feat_maps, ground_truth, threshold = 0.49):
    anchors = usable_anchors(feat_maps)
    anchors = convert(anchors)
    ground_truth = convert(ground_truth)
    f_anchors=[]
    finalized_anchors = []
    scores=[]
    for i in anchors:
        for j in ground_truth:
            area_intersection = (min(i[0],j[0])-max(i[2],j[2])) * (min(i[1],j[1])-max(i[3],j[3]))
            area_box1 = (i[0]-i[2])*(i[1]-i[3])
            area_box2 = (j[0]-j[2])*(j[1]-j[3])
            iou = area_intersection/(area_box1+area_box2-area_intersection+1e-6)
            if iou>threshold and iou<1 and area_intersection>0 and (min(i[0],j[0])-max(i[2],j[2]))>0:
                f_anchors.append(i)
                scores.append(iou)
    return f_anchors,scores

In [None]:
# predicted label has 4 parts where the parts are dx,dy,dw,dh
# dx = adjustment to center
# dy = adjustment to center
# dw = adjustment to width
# dh = adjustment to height
ground_truth = torch.Tensor([[2,2,5,6],[3,3,7,8]])
predicted = torch.Tensor([[2,2,4.5,6],[2,4,5,4],[3,3,6.5,8],[3,5,6,9]])