In [1]:
import torch
import torch.nn as nn

# https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg
# https://github.com/aladdinpersson/Machine-Learning-Collection

""" 
Information about architecture config:
Tuple is structured by (filters, kernel_size, stride) 
Every conv is a same convolution. 
List is structured by "B" indicating a residual block followed by the number of repeats
"S" is for scale prediction block and computing the yolo loss
"U" is for upsampling the feature map and concatenating with a previous layer
"""

config = [
    (32, 3, 1),
    (64, 3, 2),
    ["B", 1],
    (128, 3, 2),
    ["B", 2],
    (256, 3, 2),
    ["B", 8],
    (512, 3, 2),
    ["B", 8],
    (1024, 3, 2),
    ["B", 4],
    (512, 1, 1),
    (1024, 3, 1),
    "S",
    (256, 1, 1),
    "U",
    (256, 1, 1),
    (512, 3, 1),
    "S",
    (128, 1, 1),
    "U",
    (128, 1, 1),
    (256, 3, 1),
    "S",
]

class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, bn_act = True, **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias= not bn_act, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.leaky = nn.LeakyReLU(0.1)
        self.use_bn_act = bn_act

    def forward(self, x):
        if self.use_bn_act:
            return self.leaky(self.bn(self.conv(x)))
        else:
            return self.conv(x)


class ResidualBlock(nn.Module):
    def __init__(self, channels, use_residual=True, num_repeats=1):
        super().__init__()
        self.layers = nn.ModuleList()
        for _ in range(num_repeats):
            self.layers += [
                nn.Sequential(
                    CNNBlock(channels, channels//2, kernel_size = 1),
                    CNNBlock(channels//2, channels, kernel_size=3, padding=1),
                )
            ]
        self.use_residual = use_residual
        self.num_repeats = num_repeats

    def forward(self, x):
        for layer in self.layers:
            if self.use_residual:
                x = layer(x) + x
            else:
                x = layer(x)
        return x

class ScalePrediction(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.pred = nn.Sequential(
            CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
            CNNBlock(2 * in_channels, 3 * (num_classes + 5), bn_act=False, kernel_size=1 ), #[po, x, y, w, h]
        )
        self.num_classes = num_classes

    def forward(self, x):
        return (
            self.pred(x)
            .reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3])
            .permute(0, 1, 3, 4, 2) # Example, Anchor,  
        )

    # N x 3 X 26 X 26 X (num_classes + 5)

class YOLOv3(nn.Module):
    def __init__(self, in_channels=3, num_classes=20):
        super().__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.layers = self._create_conv_layers()


    def forward(self, x):
        outputs = []
        route_connections = []
        for layer in self.layers:
            if isinstance(layer, ScalePrediction):
                outputs.append(layer(x))
                continue

            x = layer(x)
            #print(x.shape)


            if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
                route_connections.append(x)

            elif isinstance(layer, nn.Upsample):
                x = torch.cat([x, route_connections[-1]], dim=1)

                route_connections.pop()

        return outputs

    def _create_conv_layers(self):
        layers = nn.ModuleList()
        in_channels = self.in_channels

        for module in config:
            """
            Tuple: CNN block
            List: Residual block
            String: Upsample / ScalePrediction """

            if isinstance(module, tuple):
                out_channels, kernel_size, stride = module

                layers.append(
                    CNNBlock(
                        in_channels,
                        out_channels,
                        kernel_size = kernel_size,
                        stride = stride,
                        padding = 1 if kernel_size == 3 else 0,
                   )
                )

                in_channels = out_channels
            elif isinstance(module, list):
                num_repeats = module[1]
                layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))

            elif isinstance(module, str):
                if module == "S":
                    layers += [ResidualBlock(in_channels, use_residual=False, num_repeats=1),
                               CNNBlock(in_channels, in_channels//2, kernel_size= 1),
                               ScalePrediction(in_channels//2, num_classes=self.num_classes)]

                    in_channels = in_channels // 2

                if module == "U":
                    layers.append(nn.Upsample(scale_factor = 2),)

                    in_channels = in_channels * 3

        return layers


num_classes = 20
IMAGE_SIZE = 416
model = YOLOv3(num_classes=num_classes)
x = torch.randn((2, 3, IMAGE_SIZE, IMAGE_SIZE))
out = model(x)
assert model(x)[0].shape == (2, 3, IMAGE_SIZE//32, IMAGE_SIZE//32, num_classes + 5)
assert model(x)[1].shape == (2, 3, IMAGE_SIZE//16, IMAGE_SIZE//16, num_classes + 5)
assert model(x)[2].shape == (2, 3, IMAGE_SIZE//8, IMAGE_SIZE//8, num_classes + 5)
print("Success!")



Success!


In [33]:
import pandas as pd
pd.read_csv("D:/Object detction/data/test.csv")

Unnamed: 0,000001.jpg,000001.txt
0,000002.jpg,000002.txt
1,000003.jpg,000003.txt
2,000004.jpg,000004.txt
3,000006.jpg,000006.txt
4,000008.jpg,000008.txt
...,...,...
4946,009956.jpg,009956.txt
4947,009957.jpg,009957.txt
4948,009960.jpg,009960.txt
4949,009962.jpg,009962.txt


In [3]:
import math
import torch
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint",GIoU=False, UIoU= False, DIoU=False, CIoU=False, AIoU= False, ICIoU=False ,beta =1, eps=1e-7):
    # boxes_preds shape is (N, 4) where N is the number of bboxes
    # bboxes_labels shape is (N, 4)

    if box_format=="midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    elif box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4] # (N, 1)
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = box1_x1.max(box2_x1)
    y1 = box1_y1.max(box2_y1)
    x2 = box1_x2.min(box2_x2)
    y2 = box1_y2.min(box2_y2)

    w1, h1 = box1_x2 - box1_x1, box1_y2 - box1_y1 + eps
    
    w2, h2 = box2_x2 - box2_x1, box2_y2 - box2_y1 + eps

     #.clamp(0) is for the case when they donot intersect
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

    box1_area = abs((box1_x2 - box1_x1) * (box1_y1 - box1_y2))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y1 - box2_y2))

    union = box1_area + box2_area - intersection

    iou = intersection/(union + 1e-6)

    if ICIoU or AIoU or CIoU or  DIoU or UIoU or GIoU :
        cw = box1_x2.max(box2_x2) - box1_x1.min(box2_x1)  # convex (smallest enclosing box) width
        ch = box1_y2.max(box2_y2) - box1_y1.min(box2_y1)  # convex height

        c2 = cw.pow(2) + ch.pow(2) + eps  # convex diagonal squared
            
        rho2 = (
                (box2_x1 + box2_x2 - box1_x1 - box1_x2).pow(2) + (box2_y1 + box2_y2 - box1_y1 - box1_y2).pow(2))/ 4  # center dist**2
        
        if CIoU or DIoU or ICIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            
            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi**2) * ((w2 / h2).atan() - (w1 / h1).atan()).pow(2)
                
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
                if AIoU:
                    A_Agt2 = (box1_x1 - box2_x1).pow(2) + (box1_y1 - box2_y1).pow(2)
                    B_Bgt2 = (box1_x2 - box2_x2).pow(2) + (box1_y2 - box2_y2).pow(2)
                    return iou - (rho2 / c2) + (v * alpha) + beta * ((A_Agt2 + B_Bgt2)/c2)   # AIoU
                
                return iou - (rho2 / c2) + (v * alpha)  # CIoU
            elif ICIoU:
                vv = (8 / math.pi**2) * (((w2 / w1).atan() - (math.pi/4)).pow(2)  - ((h2 / h1).atan() - (math.pi/4)).pow(2))
                
                with torch.no_grad():
                    alpha = vv / (vv - iou + (1 + eps))
                return iou - (rho2 / c2 + vv * alpha)  # ICIoU
            return iou - (rho2 / c2)  # DIoU
        c_area = cw * ch + eps  # convex area
        giou = iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
        if UIoU:
            normalized_distance = c2/rho2

            similarity = math.sqrt(min(box1_area, box1_area) / max(box1_area, box1_area)) if max(box1_area, box1_area) > 0 else 0
            if iou == 0:  # Non-overlapping case
                uiou = 0 + 0.5 * (1 - normalized_distance)
            elif iou > 0 and giou < 0.98:  # Partial overlap
                uiou = 0.5 + 0.48 * (1 + giou) / 2
            else:  # One box inside another
                uiou = 0.98 + 0.02 * ((1 / similarity ** 2) + (1 - normalized_distance)) / 2
        
            return uiou 
            
        return giou
    return iou  # IoU


In [4]:
box1 = torch.tensor([1,1,3,3])
box2 = torch.tensor([2,2,4,4])
iou = intersection_over_union(box1, box2, box_format="corners",)
iou

tensor([0.1429])

In [5]:
uiou = intersection_over_union(box1, box2, box_format="corners",UIoU=True)
uiou

tensor([0.7210])

In [6]:
def non_max_suppression(
        bboxes,
        iou_threshold,
        threshold,
        box_format="corners"
    ):
    """
       Does Non Max Suppression given bboxes

       Parameters:
           bboxes (list): list of lists containing all bboxes with each bboxes
           specified as [class_pred, prob_score, x1, y1, x2, y2]
           iou_threshold (float): threshold where predicted bboxes is correct
           threshold (float): threshold to remove predicted bboxes (independent of IoU)
           box_format (str): "midpoint" or "corners" used to specify bboxes

       Returns:
           list: bboxes after performing NMS given a specific IoU threshold
       """
    # predictions = [[class_name=1, probablity_bounding_box=0.9, X1, y1, x2, y2], [], []]

    assert type(bboxes) == list

    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []

    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
            or intersection_over_union(torch.tensor(chosen_box[2:]),
                                       torch.tensor(box[2:]),
                                       box_format=box_format) < iou_threshold
            ]

        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms


In [7]:
import numpy as np
import os
import pandas as pd
import torch

from PIL import Image, ImageFile
from torch.utils.data import Dataset, DataLoader

# from utils import ( iou_width_height as iou,
#                     non_max_supression as nms)

In [8]:
def iou_width_height(boxes1, boxes2):
    """
    Parameters:
        boxes1 (tensor): width and height of the first bounding boxes
        boxes2 (tensor): width and height of the second bounding boxes
    Returns:
        tensor: Intersection over union of the corresponding boxes
    """
    intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
        boxes1[..., 1], boxes2[..., 1]
    )
    union = (
        boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
    )
    return intersection / union

x = torch.tensor([1,2])
iou_width_height(x,x)

tensor(1.)

In [51]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

class YOLODataset(Dataset):
    def __init__(
        self,
        csv_file,
        img_dir,
        label_dir,
        anchors,
        image_size=416,
        S=[13, 26, 52],
        C=20,
        transform=None,
    ):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.image_size = image_size
        self.transform = transform
        self.S = S
        self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])  # for all 3 scales
        self.num_anchors = self.anchors.shape[0]
        self.num_anchors_per_scale = self.num_anchors // 3
        self.C = C
        self.ignore_iou_thresh = 0.5

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
        bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
        img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
        image = np.array(Image.open(img_path).convert("RGB"))

        if self.transform:
            augmentations = self.transform(image=image, bboxes=bboxes)
            image = augmentations["image"]
            bboxes = augmentations["bboxes"]

        # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
        targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
        for box in bboxes:
            iou_anchors = iou_width_height(torch.tensor(box[2:4]), self.anchors)
            anchor_indices = iou_anchors.argsort(descending=True, dim=0)
            x, y, width, height, class_label = box
            has_anchor = [False] * 3  # each scale should have one anchor
            for anchor_idx in anchor_indices:
                scale_idx = anchor_idx // self.num_anchors_per_scale # which scale
                anchor_on_scale = anchor_idx % self.num_anchors_per_scale # which anchor
                S = self.S[scale_idx]
                i, j = int(S * y), int(S * x)  # which cell xz
                anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
                if not anchor_taken and not has_anchor[scale_idx]:
                    targets[scale_idx][anchor_on_scale, i, j, 0] = 1
                    x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
                    width_cell, height_cell = (
                        width * S,
                        height * S,
                    )  # can be greater than 1 since it's relative to cell
                    box_coordinates = torch.tensor(
                        [x_cell, y_cell, width_cell, height_cell]
                    )
                    targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
                    targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
                    has_anchor[scale_idx] = True

                elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
                    targets[scale_idx][anchor_on_scale, i, j, 0] = -1  # ignore prediction

        return image, tuple(targets)

In [52]:
def plot_image(image, boxes):
    """Plots predicted bounding boxes on the image"""
    cmap = plt.get_cmap("tab20b")
    class_labels = config.COCO_LABELS if config.DATASET=='COCO' else config.PASCAL_CLASSES
    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
    im = np.array(image)
    height, width, _ = im.shape

    # Create figure and axes
    fig, ax = plt.subplots(1)
    # Display the image
    ax.imshow(im)

    # box[0] is x midpoint, box[2] is width
    # box[1] is y midpoint, box[3] is height

    # Create a Rectangle patch
    for box in boxes:
        assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
        class_pred = box[0]
        box = box[2:]
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2
        rect = patches.Rectangle(
            (upper_left_x * width, upper_left_y * height),
            box[2] * width,
            box[3] * height,
            linewidth=2,
            edgecolor=colors[int(class_pred)],
            facecolor="none",
        )
        # Add the patch to the Axes
        ax.add_patch(rect)
        plt.text(
            upper_left_x * width,
            upper_left_y * height,
            s=class_labels[int(class_pred)],
            color="white",
            verticalalignment="top",
            bbox={"color": colors[int(class_pred)], "pad": 0},
        )

    plt.show()

In [53]:
def cells_to_bboxes(predictions, anchors, S, is_preds=True):
    """
    Scales the predictions coming from the model to
    be relative to the entire image such that they for example later
    can be plotted or.
    INPUT:
    predictions: tensor of size (N, 3, S, S, num_classes+5)
    anchors: the anchors used for the predictions
    S: the number of cells the image is divided in on the width (and height)
    is_preds: whether the input is predictions or the true bounding boxes
    OUTPUT:
    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
                      object score, bounding box coordinates
    """
    BATCH_SIZE = predictions.shape[0]
    num_anchors = len(anchors)
    box_predictions = predictions[..., 1:5]
    if is_preds:
        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
        box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
        box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
        scores = torch.sigmoid(predictions[..., 0:1])
        best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
    else:
        scores = predictions[..., 0:1]
        best_class = predictions[..., 5:6]

    cell_indices = (
        torch.arange(S)
        .repeat(predictions.shape[0], 3, S, 1)
        .unsqueeze(-1)
        .to(predictions.device)
    )
    x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
    y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
    w_h = 1 / S * box_predictions[..., 2:4]
    converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(BATCH_SIZE, num_anchors * S * S, 6)
    return converted_bboxes.tolist()

In [54]:

def test():
    anchors = ANCHORS

    transform = test_transforms

    dataset = YOLODataset(
        "D:/Object detction/data/train.csv",
        "D:/Object detction/data/images/",
        "D:/Object detction/data/labels/",
        S=[13, 26, 52],
        anchors=anchors,
        transform=transform,
    )
    S = [13, 26, 52]
    scaled_anchors = torch.tensor(anchors) / (
        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
    )
    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
    for x, y in loader:
        boxes = []

        for i in range(y[0].shape[1]):
            anchor = scaled_anchors[i]
            print(anchor.shape)
            print(y[i].shape)
            boxes += cells_to_bboxes(
                y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
            )[0]
        boxes = non_max_suppression(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
        print(boxes)
        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)


In [55]:
ANCHORS = [
    [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
    [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
    [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
]  # Note these have been rescaled to be between [0, 1]

In [56]:
import cv2
from albumentations.pytorch import ToTensorV2

In [57]:
import albumentations as A
test_transforms = A.Compose(
    [
        A.LongestMaxSize(max_size=IMAGE_SIZE),
        A.PadIfNeeded(
            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        ),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)

In [59]:
import matplotlib.pyplot as plt
test()

torch.Size([3, 2])
torch.Size([1, 3, 13, 13, 6])
torch.Size([3, 2])
torch.Size([1, 3, 26, 26, 6])
torch.Size([3, 2])
torch.Size([1, 3, 52, 52, 6])
[[3.0, 1.0, 0.4520000219345093, 0.42493072152137756, 0.14400000870227814, 0.12577413022518158], [2.0, 1.0, 0.9229999780654907, 0.5447155833244324, 0.05799997225403786, 0.05789604038000107], [2.0, 1.0, 0.9229999780654907, 0.5447155833244324, 0.05799997225403786, 0.05789604038000107], [3.0, 1.0, 0.4520000219345093, 0.42493072152137756, 0.14400000870227814, 0.12577413022518158], [3.0, 1.0, 0.4520000219345093, 0.42493072152137756, 0.14400000870227814, 0.12577413022518158], [2.0, 1.0, 0.9229999780654907, 0.5447155833244324, 0.05799997225403786, 0.05789604038000107]]


AttributeError: 'list' object has no attribute 'DATASET'

In [45]:
import os

label_file = 'D:/Object detction/data\labels/009620.txt'
print(os.path.exists(label_file))  # Should return True if the file exists


True


In [10]:
file_path = "example.txt"

# Open the file in write mode and write content
with open(file_path, "w") as file:
    file.write("1,2,3,2.5,4")

In [11]:
np.roll(np.loadtxt("example.txt",delimiter= ",",ndmin=2),4, axis=1).tolist()

[[2.0, 3.0, 2.5, 4.0, 1.0]]

In [12]:
os.path.join('C:/Desktop/Deep-Learning/', 'Pytorch')

'C:/Desktop/Deep-Learning/Pytorch'

In [13]:
os.path

<module 'ntpath' from 'C:\\Users\\Nihar\\anaconda3\\envs\\tf\\lib\\ntpath.py'>

In [14]:
x = np.random.rand(10).reshape(5,2)
x

array([[0.98512046, 0.56246035],
       [0.39358461, 0.07049141],
       [0.31577928, 0.71035451],
       [0.82516357, 0.98282931],
       [0.68615355, 0.68998152]])

In [15]:
np.roll(x, -1,axis=0)

array([[0.39358461, 0.07049141],
       [0.31577928, 0.71035451],
       [0.82516357, 0.98282931],
       [0.68615355, 0.68998152],
       [0.98512046, 0.56246035]])

In [16]:
np.array([1,2,3])

array([1, 2, 3])

In [18]:
S = np.array([13, 26, 52])
S

array([13, 26, 52])

In [21]:
targets[0].shape

torch.Size([1, 13, 13, 6])

In [22]:
targets[1].shape

torch.Size([1, 26, 26, 6])

In [23]:
targets[2].shape

torch.Size([1, 52, 52, 6])

In [24]:
anchor

array([[0.9627, 0.0426, 0.1572],
       [0.9534, 0.8682, 0.0138],
       [0.0743, 0.4189, 0.6775]])

In [28]:
anchors = [
    [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
    [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
    [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
] 
anchors

[[(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
 [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
 [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)]]

In [26]:
box = [.25,.29,.9,.7]
box

[0.25, 0.29, 0.9, 0.7]

In [32]:
anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])
anchors

tensor([[0.2800, 0.2200],
        [0.3800, 0.4800],
        [0.9000, 0.7800],
        [0.0700, 0.1500],
        [0.1500, 0.1100],
        [0.1400, 0.2900],
        [0.0200, 0.0300],
        [0.0400, 0.0700],
        [0.0800, 0.0600]])

In [33]:
ious = iou_width_height(torch.tensor(box[2:4]), anchors)
ious

tensor([0.0978, 0.2895, 0.8974, 0.0167, 0.0262, 0.0644, 0.0010, 0.0044, 0.0076])

In [31]:
torch.tensor(box[2:4])

tensor([0.9000, 0.7000])

In [37]:
anchor_indices = ious.argsort(descending=True,dim=0)
anchor_indices

tensor([2, 1, 0, 5, 4, 3, 8, 7, 6])

In [40]:
num_anchors= anchors.shape[0]
num_anchors

9

In [41]:
num_anchors_per_scale = num_anchors // 3
num_anchors_per_scale

3

In [42]:
S = np.array([13, 26, 52])
targets = [torch.zeros((num_anchors // 3, s, s, 6 )) for s in S] 
targets

[tensor([[[[0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.]],
 
          [[0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.]],
 
          ...,
 
          [[0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0.]],
 
          [[0., 0.,

In [45]:
targets[0].shape, targets[1].shape, targets[2].shape

(torch.Size([3, 13, 13, 6]),
 torch.Size([3, 26, 26, 6]),
 torch.Size([3, 52, 52, 6]))

In [51]:
ious, anchor_indices, num_anchors_per_scale

(tensor([0.0978, 0.2895, 0.8974, 0.0167, 0.0262, 0.0644, 0.0010, 0.0044, 0.0076]),
 tensor([2, 1, 0, 5, 4, 3, 8, 7, 6]),
 3)

In [48]:
for anchor_idx in anchor_indices:
                scale_idx = anchor_idx // num_anchors_per_scale # 0, 1, 2
                anchor_on_scale = anchor_idx % num_anchors_per_scale
                print(scale_idx, anchor_on_scale)

tensor(0) tensor(2)
tensor(0) tensor(1)
tensor(0) tensor(0)
tensor(1) tensor(2)
tensor(1) tensor(1)
tensor(1) tensor(0)
tensor(2) tensor(2)
tensor(2) tensor(1)
tensor(2) tensor(0)


In [47]:
scale_idx

tensor(2)