In [None]:
import torch
import torch.nn as nn

class res_block(nn.Module):
  def __init__(self, in_channels=32, out_channels=32, stride=1, downsample=None, expansions=1):
    super(res_block, self).__init__()
    in_channels = in_channels
    out_channels = out_channels
    self.conv1 = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, ),
                    nn.BatchNorm2d(out_channels),
                    nn.SiLU())
    self.conv2 = nn.Sequential(
                    nn.Conv2d(out_channels, in_channels, kernel_size=3, stride=1, padding=1),
                    nn.BatchNorm2d(in_channels),
                    nn.SiLU())
  def forward(self, x):
      residual = x
      out = self.conv1(x)

      out = self.conv2(out)

      out += residual


      return out




In [None]:
import torch
import torch.nn as nn

class BottleNeck2(nn.Module):
  def __init__(self, channels=0):
    super(BottleNeck2, self).__init__()
    in_channels = channels
    out_channels = channels
    self.conv1 = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, ),
                    nn.BatchNorm2d(out_channels),
                    nn.SiLU())
    self.conv2 = nn.Sequential(
                    nn.Conv2d(out_channels, in_channels, kernel_size=3, stride=1, padding=1),
                    nn.BatchNorm2d(in_channels),
                    nn.SiLU())
  def forward(self, x):

      out = self.conv1(x)

      out = self.conv2(out)



      return out




In [None]:
from torch.nn.modules.activation import LeakyReLU
class cspdarknet(nn.Module):
  def __init__(self, ):
    super(cspdarknet, self).__init__()
    block = res_block
    self.conv0 = nn.Sequential(
                    nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
                    nn.BatchNorm2d(64),
                    nn.SiLU(),)
    self.downsample1 = nn.Sequential(
        nn.Conv2d(64 ,128, kernel_size=3, stride=2, padding=1,),
        nn.BatchNorm2d(128),
        nn.SiLU()

    )
    self.conv1 = nn.Sequential(
        nn.Conv2d(128, 64, kernel_size=1, stride=1),
        nn.BatchNorm2d(64),
        nn.SiLU()
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(128, 64, kernel_size=1, stride=1),
        nn.BatchNorm2d(64),
        nn.SiLU()
    )

    self.block1 = self._make_block(block, in_cha=64, out_cha=64, repeats = 3, expansion=2)

    self.convb1 = nn.Sequential(
        nn.Conv2d(128, 128, kernel_size=1, stride=1),
        nn.BatchNorm2d(128),
        nn.SiLU()
    )
    self.downsample2 = nn.Sequential(
        nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.conv3 = nn.Sequential(
        nn.Conv2d(256, 128, kernel_size=1, stride=1),
        nn.BatchNorm2d(128),
        nn.SiLU()
    )
    self.conv4 = nn.Sequential(
        nn.Conv2d(256, 128, kernel_size=1, stride=1),
        nn.BatchNorm2d(128),
        nn.SiLU()
    )
    self.block2 = self._make_block(block, in_cha=128, out_cha=128,repeats=6, expansion=4)

    self.convb2 = nn.Sequential(
        nn.Conv2d(256, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.downsample3 = nn.Sequential(
        nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.conv5 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.conv6 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.block3 = self._make_block(block, in_cha=256, out_cha=256, repeats=9, expansion=8)

    self.convb3 = nn.Sequential(
        nn.Conv2d(512, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.downsample4 = nn.Sequential(
        nn.Conv2d(512, 1024, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(1024),
        nn.SiLU()
    )

    self.conv7 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.conv8 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.block4 = self._make_block(block, in_cha=512, out_cha=512, repeats=3, expansion=16)

    self.convb4 = nn.Sequential(
        nn.Conv2d(1024, 1024, kernel_size=1, stride=1),
        nn.BatchNorm2d(1024),
        nn.SiLU()
    )
  def _make_block(self, block, in_cha=64, out_cha=64, repeats=0, expansion=0):
    layers = []

    for i in range(repeats):
      layers.append(block(in_channels=in_cha, out_channels=out_cha ,expansions=expansion))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv0(x)
    down_s1 = self.downsample1(x)

    ## block1

    x0 = self.conv1(down_s1)
    x1 = self.conv2(down_s1)

    b1 = self.block1(x1)
    b1 = torch.cat((b1,x0), dim=1)
    b1 = self.convb1(b1)
    down_s2 = self.downsample2(b1)
    ##block2

    x0 = self.conv3(down_s2)
    x1 = self.conv4(down_s2)

    b2 = self.block2(x1)
    b2 = torch.cat((b2, x0), dim=1)
    b2 = self.convb2(b2)
    down_s3 = self.downsample3(b2)
    ## block3

    x0 = self.conv5(down_s3)
    x1 = self.conv6(down_s3)


    b3 = self.block3(x1)
    b3 = torch.cat((b3, x0), dim=1)
    b3 = self.convb3(b3)
    down_s4 = self.downsample4(b3)
    ## block4

    x0 = self.conv7(down_s4)
    x1 = self.conv8(down_s4)

    b4 = self.block4(x1)


    b4 = torch.cat((b4,x0), dim=1)
    b4 = self.convb4(b4)



    return {'b4':b4, 'b3':b3, 'b2':b2}



In [None]:
import torch.nn.functional as F
class Yolov5(nn.Module):
  def __init__(self, ):
    super(Yolov5, self).__init__()
    self.cspdarknet = cspdarknet()
    block = BottleNeck2
    self.spp_conv1 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    #self.conv_spp2 = self._make_into_spp_convs()
    self.spp1 = self._make_spp(5)
    self.spp2 = self._make_spp(5)
    self.spp3 = self._make_spp(5)
    self.spp_conv2 = nn.Sequential(
        nn.Conv2d(2048, 1024, kernel_size=1, stride=1),
        nn.BatchNorm2d(1024),
        nn.SiLU(),
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.conv1 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.block1 = self._make_bottleneck(block ,channels=512, repeats=3)
    self.conv3 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.conv4 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.conv5 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.conv6 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.block2 = self._make_bottleneck(block, channels=256, repeats=3)
    self.conv7 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.downsample_one = nn.Sequential(
        nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )

    self.conv8 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.conv9 = nn.Sequential(
        nn.Conv2d(512, 256, kernel_size=1, stride=1),
        nn.BatchNorm2d(256),
        nn.SiLU()
    )
    self.block3 = self._make_bottleneck(block, channels=256, repeats=3)
    self.conv10 = nn.Sequential(
        nn.Conv2d(512, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.downsample_two = nn.Sequential(
        nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )


    self.conv11 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )

    self.conv12 = nn.Sequential(
        nn.Conv2d(1024, 512, kernel_size=1, stride=1),
        nn.BatchNorm2d(512),
        nn.SiLU()
    )
    self.block4 = self._make_bottleneck(block, channels=512, repeats=3)


    self.conv13 = nn.Sequential(
        nn.Conv2d(1024, 1024, kernel_size=1, stride=1),
        nn.BatchNorm2d(1024),
        nn.SiLU()
    )

    self.output1 = self._final_layer( 1024)
    self.output2 = self._final_layer( 512)
    self.output3 = self._final_layer( 256)

  def _make_bottleneck(self, block, channels=0, repeats=0):
    layers = []
    for i in range(repeats):
      layers.append(block(channels))
    return nn.Sequential(*layers)

  def _final_layer(self, input):

     return  nn.Conv2d(input, 15, kernel_size=1, stride=1)



  def _make_spp(self, size):
    return nn.MaxPool2d(kernel_size=size, stride=1, padding=size//2)



  def forward(self, images, targets, anchors):
      ## feature extraction
      features = self.cspdarknet(images)

      ## spp start
      #x0 = self.conv_spp1(x['b4'])
      x = self.spp_conv1(features['b4'])
      spp1 = self.spp1(x)
      spp2 = self.spp2(spp1)
      spp3 = self.spp3(spp2)
      b4 = torch.cat((spp1, spp2, spp3, x), dim=1)
      b4 = self.spp_conv2(b4)

      #b4 = torch.cat((b4, x0), dim=1)
      #b4 = self.conv1(b4)
      ## ssp finish

      ## pan start
      #b4_route = self.conv2(b4)
      upsampled_one = F.interpolate(b4, (26, 26), mode='nearest')
      b3 = features['b3']
      concat_one = torch.cat((b3, upsampled_one), dim=1)

      b3 = self.conv1(concat_one)
      b3_route = self.conv2(concat_one)
      b3_route = self.block1(b3_route)
      b3 = torch.cat((b3, b3_route ),dim=1)
      b3 = self.conv3(b3)
      b3 = self.conv4(b3)


      upsampled_two = F.interpolate(b3, (52,52), mode='nearest')
      b2 = features['b2']
      concat_two = torch.cat((upsampled_two,b2), dim=1)

      b2 = self.conv5(concat_two)
      b2_route = self.conv6(concat_two)
      b2_route = self.block2(b2_route)
      b2 = torch.cat((b2, b2_route), dim=1)
      b2 = self.conv7(b2)


      downsample_one = self.downsample_one(b2)
      concat_pan_one = torch.cat((downsample_one, b3), dim=1)

      b3 = self.conv8(concat_pan_one)
      b3_route = self.conv9(concat_pan_one)
      b3_route = self.block3(b3_route)
      b3 = torch.cat((b3, b3_route), dim=1)
      b3 = self.conv10(b3)


      downsample_two = self.downsample_two(b3)
      concat_pan_two = torch.cat((downsample_two, b4), dim=1)

      b4 = self.conv11(concat_pan_two)
      b4_route = self.conv12(concat_pan_two)
      b4_route = self.block4(b4_route)
      b4 = torch.cat((b4, b4_route), dim=1)
      b4 = self.conv13(b4)


      ## pan finished

      output_s = self.output3(b2).permute(0,2,3,1).view(-1,52,52,3,5)
      output_m = self.output2(b3).permute(0,2,3,1).view(-1,26,26,3,5)
      output_b = self.output1(b4).permute(0,2,3,1).view(-1,13,13,3,5)

      loss_outputs = output_preprocessing(output_s, output_m, output_b, targets, images, anchors)

      return (loss_outputs['offset_loss'] *0.05+ loss_outputs['obj_loss']+
              loss_outputs['noobj_loss']*0.632), loss_outputs






In [None]:
from tqdm import tqdm
from functools import partial
tqdm = partial(tqdm, position=0, leave=True)
import torch
import numpy as np
import time
from PIL import Image
import matplotlib.pyplot as plt


def train_epocs(model, optimizer, data_loader, devie, all_anchors ,epochs=30,training_state=False ):

    for epoch in range(epochs):
        if epoch==30:
           parameters = model.parameters()
           optimizer = torch.optim.SGD(parameters, lr = 0.000002,
                                       momentum=0.99)
        total = 0
        sum_loss = 0
        sum_loss_classifier = 0
        sum_loss_offsets = 0
        sum_loss_noobj = 0
        sum_loss_obj = 0

        iteration_num = 0

        for i, data in enumerate(tqdm(data_loader)):
            images = data[0].to(device)
            targets = data[1]
            batch_length = len(images)
            ### gradient tracking
            if not training_state:
               with torch.no_grad():
                  model.eval()
                  output = model(images, targets = targets, anchors = all_anchors)
            else:
                 model.train()
                 output = model(images, targets, all_anchors)
            Final_loss = output[0]
            Final_loss.backward()
            if training_state and i%12==0:
               optimizer.step()
               optimizer.zero_grad()


            iteration_num += 1
            total += batch_length

            sum_loss += output[0]
            #sum_loss_classifier += output[1]
            sum_loss_offsets += output[1]['offset_loss']
            sum_loss_noobj += output[1]['noobj_loss']
            sum_loss_obj += output[1]['obj_loss']

            if iteration_num % 50 == 0:

                train_loss = sum_loss/total
                train_loss_classifier = sum_loss_classifier/total
                train_loss_offsets = sum_loss_offsets/total
                train_loss_noobj = sum_loss_noobj/total
                train_loss_obj = sum_loss_obj/total
                print("loss %.6f classifer %.6f regressor %.6f noobj_loss %.6f obj_loss %.6f "%
                      (train_loss, train_loss_classifier, train_loss_offsets, train_loss_noobj, train_loss_obj))
                total = 0.0
                sum_loss= 0.0
                sum_loss_classifier = 0.0
                sum_loss_offsets = 0.0
                sum_loss_noobj = 0.0
                sum_loss_obj = 0.0


    return model


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = Yolov5().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2 , momentum = 0.937, weight_decay=0.0005)
print(device)



cuda


In [None]:
torch.cuda.empty_cache()

In [None]:
train = train_epocs(model, optimizer, data_loader, device, all_anchors, epochs=20 ,training_state=True)





In [None]:

def output_preprocessing(prediction_s, prediction_m,
                         prediction_b, gt_boxes, images, anchors):

  noobj_batch_loss = 0
  obj_batch_loss = 0
  offsets_batch_loss = 0
  for pred_s, pred_m, pred_b, gt in zip(prediction_s, prediction_m, prediction_b, gt_boxes):
      pred_offsets_s, pred_offsets_m, pred_offsets_b = [], [], []
      target_offsets_s, target_offsets_m, target_offsets_b = [], [], []
      used_cell = []
      used_anchor_cell = []
      centroids = [ sample[1] for sample in gt ]
      boxes_in_image = torch.stack([ sample[2] for sample in gt])


      pred_s_to_loss = torch.zeros_like(pred_s, )
      pred_s_to_loss[:,:,:,4]=1
      pred_m_to_loss = torch.zeros_like(pred_m, )
      pred_m_to_loss[:,:,:,4]=1
      pred_b_to_loss = torch.zeros_like(pred_b,)
      pred_b_to_loss[:,:,:,4]=1
      #img = images[0].to(torch.device('cuda')).numpy().transpose(1,2,0).copy()

      for idx, grid_cells in enumerate(centroids):

          small_cell   = grid_cells[0]
          medium_cell  = grid_cells[1]
          big_cell     = grid_cells[2]

          current_box  = boxes_in_image[idx].unsqueeze(0)

          small_cell_anchors  = torch.tensor(anchors['small'][ small_cell[0]*52 + small_cell[1]])
          medium_cell_anchors = torch.tensor(anchors['medium'][ medium_cell[0]*26 + medium_cell[1]])
          big_cell_anchors    = torch.tensor(anchors['big'][ big_cell[0]*13 + big_cell[1]])

          anchors_in_cell     = torch.cat((small_cell_anchors,
                                         medium_cell_anchors,
                                         big_cell_anchors))

          pos_anchor, anchor_cell_location, cell_location, ignore_anchors = anchors_assignment(current_box,
                                                                         anchors_in_cell,
                                                                         grid_cells,
                                                                         used_anchor_cell,
                                                                         used_cell, idx)


          if pos_anchor is not None and pos_anchor < 3:
            pred_s_to_loss[grid_cells[0][0], grid_cells[0][1], pos_anchor, 4] = 0
            pred_offset = pred_s[grid_cells[0][0], grid_cells[0][1], pos_anchor]
            size = 's'
          elif pos_anchor is not None and pos_anchor >= 3 and pos_anchor < 6:
            pred_m_to_loss[grid_cells[1][0], grid_cells[1][1], pos_anchor - 3, 4 ] = 0
            pred_offset = pred_m[grid_cells[1][0], grid_cells[1][1], pos_anchor - 3]
            size = 'm'
          elif pos_anchor is not None and pos_anchor>=6 and pos_anchor<9:
            pred_b_to_loss[grid_cells[2][0], grid_cells[2][1], pos_anchor - 6, 4] = 0
            pred_offset = pred_b[grid_cells[2][0], grid_cells[2][1], pos_anchor - 6]
            size = 'b'

          for ignore_idx in ignore_anchors:
            if ignore_idx < 3:
              if pred_s_to_loss[grid_cells[0][0], grid_cells[0][1], ignore_idx, 4] == 1:
                pred_s_to_loss[grid_cells[0][0], grid_cells[0][1], ignore_idx, 4] = -1
            elif ignore_idx >= 3 and ignore_idx<6:
              if pred_m_to_loss[grid_cells[1][0], grid_cells[1][1], ignore_idx - 3, 4] == 1:
                pred_m_to_loss[grid_cells[1][0], grid_cells[1][1], ignore_idx - 3, 4] = -1
            elif ignore_idx >=6:
              if pred_b_to_loss[grid_cells[2][0], grid_cells[2][1], ignore_idx - 6, 4] == 1:
                pred_b_to_loss[grid_cells[2][0], grid_cells[2][1], ignore_idx - 6, 4] = -1


          used_cell.append(cell_location)
          used_anchor_cell.append(anchor_cell_location)
          if pos_anchor is not None:
            scale = 8 if anchor_cell_location[1]<3 else 16 if anchor_cell_location[1]<6 else 32
            assigned_anchor = anchors_in_cell[pos_anchor].unsqueeze(0)
            if size=='s':
               pred_offsets_s.append(pred_offset)
               target_offset = target_calculation(assigned_anchor, current_box, scale)
               target_offsets_s.append(target_offset)

            elif size=='m':
               pred_offsets_m.append(pred_offset)
               target_offset = target_calculation(assigned_anchor, current_box, scale)
               target_offsets_m.append(target_offset)

            elif size=='b':
               pred_offsets_b.append(pred_offset)
               target_offset = target_calculation(assigned_anchor, current_box, scale)
               target_offsets_b.append(target_offset)

            '''
            for d, b in zip(assigned_anchor, current_box):
              x1 = int(d[0])
              y1 = int(d[1])
              x2 = int(d[2])
              y2 = int(d[3])
              xx = int(b[0] - b[2]/2)
              yy = int(b[1] - b[3]/2)
              xx1 = int(b[2]/2 + b[0])
              yy1 = int(b[3]/2 + b[1])
              cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2)
              cv2.rectangle(img, (xx,yy), (xx1, yy1), (0,255,0), 2)
            '''
            #target_offset = target_calculation(assigned_anchor, current_box, scale)
            #target_offsets.append(target_offset)

      #plt.imshow(img)
      noobj_scores = torch.cat( (pred_s[pred_s_to_loss==1], pred_m[pred_m_to_loss==1], pred_b[pred_b_to_loss==1] ))
      noobj_loss = F.binary_cross_entropy_with_logits(nn.functional.sigmoid(noobj_scores.to('cuda')),
                                                      torch.zeros_like(noobj_scores, device='cuda'), )
      noobj_batch_loss += noobj_loss
      obj_loss_s, obj_loss_m, obj_loss_b = 0,0,0
      if pred_offsets_s or pred_offsets_m or pred_offsets_b:
        if pred_offsets_s:
          pred_offsets_s = torch.stack(pred_offsets_s)
          obj_loss_s = F.binary_cross_entropy_with_logits(nn.functional.sigmoid(pred_offsets_s[:,4]),
                                  torch.ones_like(pred_offsets_s[:,4], device=pred_offsets_s.device), )* 4.0
          target_offsets_s = torch.stack(target_offsets_s)
          xy_loss = F.mse_loss(nn.functional.sigmoid(pred_offsets_s[:,:2]), target_offsets_s[:,:2], reduction='sum')
          wh_loss = F.mse_loss(torch.exp(pred_offsets_s[:,2:4]), target_offsets_s[:,2:4], reduction='sum')
          offset_loss = (xy_loss+wh_loss)/target_offsets_s.numel()
          #print('s',offset_loss)
          offsets_batch_loss += offset_loss

        if pred_offsets_m:
          pred_offsets_m = torch.stack(pred_offsets_m)
          obj_loss_m = F.binary_cross_entropy_with_logits(nn.functional.sigmoid(pred_offsets_m[:,4]),
                                  torch.ones_like(pred_offsets_m[:,4], device=pred_offsets_m.device))
          target_offsets_m = torch.stack(target_offsets_m)
          xy_loss = F.mse_loss(nn.functional.sigmoid(pred_offsets_m[:,:2]), target_offsets_m[:,:2], reduction='sum')
          wh_loss = F.mse_loss(torch.exp(pred_offsets_m[:,2:4]), target_offsets_m[:,2:4], reduction='sum')
          offset_loss = (xy_loss + wh_loss)/target_offsets_m.numel()
          #print('m',offset_loss)
          offsets_batch_loss+=offset_loss

        if pred_offsets_b:
          pred_offsets_b = torch.stack(pred_offsets_b)
          obj_loss_b = F.binary_cross_entropy_with_logits(nn.functional.sigmoid(pred_offsets_b[:,4]),
                                  torch.ones_like(pred_offsets_b[:,4], device=pred_offsets_b.device)) * 0.4
          target_offsets_b = torch.stack(target_offsets_b)
          xy_loss = F.mse_loss(nn.functional.sigmoid(pred_offsets_b[:,:2]), target_offsets_b[:,:2], reduction='sum')
          wh_loss = F.mse_loss(torch.exp(pred_offsets_b[:,2:4]), target_offsets_b[:,2:4], reduction='sum')
          offset_loss = (xy_loss + wh_loss)/target_offsets_b.numel()
          #print('b',offset_loss)
          offsets_batch_loss+=offset_loss

        obj_batch_loss += (obj_loss_s + obj_loss_m + obj_loss_b)

  return {'offset_loss':offsets_batch_loss/16, 'obj_loss':obj_batch_loss/16 , 'noobj_loss':noobj_batch_loss/16}



In [None]:
def calc_iou(box, anchors):
  anchors_area = ( anchors[:,2] - anchors[:,0] ) * ( anchors[:,3] - anchors[:,1] )
  boxes_area   = box[:,2] * box[:,3]
  upper_left_intersection = torch.max(anchors[:,:2].unsqueeze(1), (box[:,:2] - box[:,2:]/2))
  lower_right_intersection = torch.min(anchors[:,2:].unsqueeze(1), (box[:,:2] + box[:,2:]/2))
  intersection_dimension = (lower_right_intersection - upper_left_intersection).clamp(min=0)
  intersection_area = intersection_dimension[:,:,0] * intersection_dimension[:,:,1]
  union_area = anchors_area.unsqueeze(1) + boxes_area - intersection_area
  return intersection_area/union_area

def anchors_assignment(box, anchors, cells_location, used_anchor_location_cell, used_cell, idx):
    neg_anchors = [0,1,2,3,4,5,6,7,8]
    pos_anchor = None
    triger = False
    discarded_anchors = []
    ignore_anchors = []
    confidence_supposed_pos = None
    cell_location = None
    ious = calc_iou(box, anchors)
    ious = torch.transpose(ious, 0, 1)

    values, indices = torch.sort( ious, descending=True)
    values, indices = values[0], indices[0]
    greater_than_half = torch.where(values >= 0.7)[0]
    ignore_anchors = indices[greater_than_half]
    for num in range(len(indices)):
      if values[num] > 0.3:
        pos_anchor = indices[num]
        cell_index = int(pos_anchor/3)
        anchor_location_in_cell = [cells_location[cell_index], pos_anchor ]
        cell_location = cells_location[cell_index]
        if anchor_location_in_cell in used_anchor_location_cell:
          continue
        else:
          break
      else:
        return None, None, None, ignore_anchors

    return pos_anchor, anchor_location_in_cell, cell_location, ignore_anchors


In [None]:
def target_calculation(assigned_anchors, assigned_bbox, scale):

    for anchor, box in zip(assigned_anchors, assigned_bbox):
        x = (box[0] - ( int(box[0]/scale) * scale))/scale
        y = (box[1] - ( int(box[1]/scale) * scale))/scale

        anchor_w = anchor[2] - anchor[0]
        anchor_h = anchor[3] - anchor[1]

        w = box[2]/anchor_w
        h = box[3]/anchor_h

        target = torch.tensor([x, y, w, h], dtype=torch.float32).to('cuda')


    return target



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip /content/gdrive/MyDrive/wider_face_split.zip


Archive:  /content/gdrive/MyDrive/wider_face_split.zip
   creating: wider_face_split/
  inflating: wider_face_split/readme.txt  
  inflating: wider_face_split/wider_face_test.mat  
  inflating: wider_face_split/wider_face_test_filelist.txt  
  inflating: wider_face_split/wider_face_train.mat  
  inflating: wider_face_split/wider_face_train_bbx_gt.txt  
  inflating: wider_face_split/wider_face_val.mat  
  inflating: wider_face_split/wider_face_val_bbx_gt.txt  


In [None]:

!unzip /content/gdrive/MyDrive/WIDER_train.zip

In [None]:

from PIL import Image
import torchvision
import numpy as np
import cv2
import torch

class LoadDataSet(torch.utils.data.Dataset):
  def __init__(self, annotation_path, images_path, transforms):
    self.images_path_list = images_path
    self.annotation_path_list = annotation_path
    self.transform = transforms
    self.size = len(self.images_path_list)
    print(self.size)

  def __getitem__(self, idx):
    Transform = torchvision.transforms.Normalize(mean=[0.485,0.456,0.406],
                                                 std=[0.229,0.224,0.225])

    img_path = '/content/WIDER_train/images/'+self.images_path_list[idx]
    gt_boxes = self.annotation_path_list[idx]
    img = Image.open(img_path)
    img = np.asarray(img)
    img_size = img.shape
    image = cv2.resize(img/255., (416,416), interpolation=cv2.INTER_AREA)
    image = torch.as_tensor(image, dtype=torch.float32).permute(2,0,1)
    image = Transform(image)
    scalar = torch.tensor([1, 416/img_size[1], 416/img_size[0], 416/img_size[1], 416/img_size[0]], dtype=torch.float32)

    boxes = []
    scale_small = 8
    scale_medium = 16
    scale_big = 32


    for box in gt_boxes:
        s_box = torch.tensor(box, dtype=torch.float32)*scalar

        x_ctr = (s_box[3] + s_box[1])/2
        w     = (s_box[3] - s_box[1])
        #print("x_ctr", x_ctr)
        x0_small_cell = int(x_ctr/scale_small)
        x0_medium_cell = int(x_ctr/scale_medium)
        x0_big_cell = int(x_ctr/scale_big)


        y_ctr = (s_box[4] + s_box[2])/2
        h     = (s_box[4] - s_box[2])
        #print("y_ctr", y_ctr)
        y0_small_cell = int(y_ctr/scale_small)
        y0_medium_cell = int(y_ctr/scale_medium)
        y0_big_cell = int(y_ctr/scale_big)


        s_box[1] = x_ctr
        s_box[2] = y_ctr
        s_box[3] = w
        s_box[4] = h

        boxes.append([1, [(x0_small_cell, y0_small_cell),(x0_medium_cell, y0_medium_cell), (x0_big_cell, y0_big_cell)], s_box[1:]])

    return image, boxes, img_path

  def __len__(self):
    return self.size

def collate_fn(batch):
  images = []
  boxes = []
  for data in batch:
    images.append(data[0])
    boxes.append(data[1])
  return torch.stack(images, dim=0), boxes, data[2]





In [None]:
file = open('/content/wider_face_split/wider_face_train_bbx_gt.txt')
lista=[]
inside_list=[]
names=[]
# this parameter is used to generate the anchors
all_boxes = []
triger=0

for idx, line in enumerate(file.readlines()):
    if line[-4:]=="jpg\n":
       names.append(line[:-1])
       if (inside_list==[] or triger==1) and idx!=0:
          names.pop(len(names)-2)
          inside_list = []
          triger = 0
          continue
       if idx!=0 and inside_list!=[]:
          lista.append(inside_list)
          inside_list = []
    if line[-4:]!='jpg\n' and len(line)>15:
       num_list = line.strip()
       num_str = num_list.split()
       x1 = int(num_str[0])
       y1 = int(num_str[1])
       x2 = int(num_str[0]) + int(num_str[2])
       y2 = int(num_str[1]) + int(num_str[3])
       if num_str[2]!='0' and num_str[3]!='0' and num_str[7]!='1' and (x2-x1)>20 and (x2-x1)<1200 and (y2-y1)>20 and (y2-y1)<1200:
          array_int = [1, x1, y1, x2, y2]
          all_boxes.append(array_int[1:])
          inside_list.append(array_int)
       else:
          triger=1
    if idx == 185183:
       if inside_list!=[] and triger==0:
          lista.append(inside_list)
       elif inside_list==[] and triger==1:
          names.pop()


In [None]:
#device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dataset = LoadDataSet(annotation_path=lista, images_path=names, transforms=None)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn, drop_last=True)


9074


In [None]:
import torch
import numpy as np
new_centroids = torch.tensor([[  0.0000,   0.0000,  13.2136,  21.0693],
        [  0.0000,   0.0000,  18.2718,  33.5538],
        [  0.0000,   0.0000,  28.8336,  45.1136],
        [  0.0000,   0.0000,  36.9599,  76.1135],
        [  0.0000,   0.0000,  55.4889,  52.9748],
        [  0.0000,   0.0000,  57.9636, 119.5746],
        [  0.0000,   0.0000,  96.7757,  97.8682],
        [  0.0000,   0.0000, 118.6441, 183.7684],
        [  0.0000,   0.0000, 199.0680, 252.7008]], dtype=torch.float32)

In [None]:


anchors_small = []
anchors_medium = []
anchors_big = []

for i in range(52):
  for j in range(52):
    anchors_per_cell_small = []
    anchors_per_cell_medium = []
    anchors_per_cell_big = []
    for k, box in enumerate(new_centroids[:3]):
        if i<52 and j<52:
          anchor = [ (i)*8 + 4 - int((box[2] - box[0])/2), (j)*8 + 4 - int((box[3] - box[1])/2),
                    (i)*8 + 4 + int((box[2] - box[0])/2), (j)*8 + 4 + int((box[3] - box[1])/2)]
          if sum(torch.tensor(anchor) >= 0)==4:
            anchors_per_cell_small.append(anchor)
          else:
            anchors_per_cell_small.append([0.0, 0.0, 0.0, 0.0])

    for k, box in enumerate(new_centroids[3:6]):
      if i<26 and j<26:
        anchor = [ (i)*16 + 8 - int((box[2] - box[0])/2), (j)*16 + 8 - int((box[3] - box[1])/2),
                  (i)*16 + 8 + int((box[2] - box[0])/2), (j)*16 + 8 + int((box[3] - box[1])/2)]
        if sum(torch.tensor(anchor) >= 0)==4:
          anchors_per_cell_medium.append(anchor)
        else:
          anchors_per_cell_medium.append([0.0, 0.0, 0.0, 0.0])


    for k, box in enumerate(new_centroids[6:]):
        if i<13 and j<13:
          anchor = [ (i)*32 + 16 - int((box[2]- box[0])/2), (j)*32 + 16 - int((box[3] - box[1])/2),
                    (i)*32+16 + int((box[2]- box[0])/2), (j)*32 + 16 + int((box[3] - box[1])/2)]
          if sum(torch.tensor(anchor) >= 0)==4:
            anchors_per_cell_big.append(anchor)
          else:
            anchors_per_cell_big.append([0.0, 0.0, 0.0, 0.0])


    if i<52 and j<52:
          anchors_small.append(anchors_per_cell_small)

    if i<26 and j<26:
        anchors_medium.append(anchors_per_cell_medium)

    if i<13 and j<13:
        anchors_big.append(anchors_per_cell_big)

all_anchors = {'small':anchors_small, 'medium':anchors_medium, 'big':anchors_big}

