# Defininig path

In [14]:
import sys
from pathlib import Path
import os
sys.path.append(str(Path(os.getcwd()).resolve().parent))

# Path of the project
PROJECT_PATH = Path(os.getcwd()).resolve().parent

# Main data dir
DATA_PATH = PROJECT_PATH / 'data'

# Test data path
TEST_PATH = DATA_PATH / 'test'
TEST_IMAGES_PATH = TEST_PATH / 'images'
COCO_TEST_PATH = TEST_PATH / 'coco_annotations' / 'annotations.json'

# Load DataLoader

In [15]:
from utils.data_utils.load_dataloader import load_dataloader

id2label = {
    0: 'N/A',
    1: 'Ball',
    2: 'Goalkeeper',
    3: 'Player',
    4: 'Referee'
}

new_categories = [
    {'id': k, 'name': v}
    for k, v in id2label.items()
    if v != 'N/A'
]

test_dl = load_dataloader(
    root_dir=TEST_IMAGES_PATH,
    coco_path=COCO_TEST_PATH,
    set_ratio=1,
    custom_categories=new_categories,
    batch_size=1,
    shuffle=False,
    desire_bbox_format='coco',
    pin_memory=True
)

loading annotations into memory...
Done (t=0.08s)
creating index...
index created!
Changed [{'id': 1, 'name': 'ball'}, {'id': 2, 'name': 'goalkeeper'}, {'id': 3, 'name': 'player'}, {'id': 4, 'name': 'referee'}] categories to [{'id': 1, 'name': 'Ball'}, {'id': 2, 'name': 'Goalkeeper'}, {'id': 3, 'name': 'Player'}, {'id': 4, 'name': 'Referee'}] categories


# Load Processor

In [86]:
from transformers import DeformableDetrImageProcessor

MODEL_ID = 'SenseTime/deformable-detr'
PROCESSOR = DeformableDetrImageProcessor.from_pretrained(MODEL_ID, use_fast=True)

# Load Model

In [87]:
from transformers import DeformableDetrConfig, DeformableDetrForObjectDetection
import torch.nn as nn

config = DeformableDetrConfig.from_pretrained(MODEL_ID)
config.id2label = id2label
config.label2id = {v: k for k, v in id2label.items()}
config.num_labels = len(id2label)

model = DeformableDetrForObjectDetection.from_pretrained(MODEL_ID, config=config, ignore_mismatched_sizes=True)
for i in range(len(model.class_embed)):
    nn.init.xavier_uniform(model.class_embed[i].weight)
    if model.class_embed[i].bias is not None:
        nn.init.constant_(model.class_embed[i].bias, 0)

Some weights of the model checkpoint at SenseTime/deformable-detr were not used when initializing DeformableDetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DeformableDetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DeformableDetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DeformableDetrForObjectDetection wer

# Load sample of image and targets

In [88]:
image, targets = next(iter(test_dl))

In [89]:
image[0].shape

torch.Size([3, 1080, 1920])

In [90]:
targets

[{'image_id': 1,
  'annotations': [{'bbox': tensor([257., 388.,  35.,  67.]),
    'category_id': tensor(3),
    'area': tensor(2345.)},
   {'bbox': tensor([1627.,  566.,   39.,   79.]),
    'category_id': tensor(3),
    'area': tensor(3081.)},
   {'bbox': tensor([508., 359.,  26.,  66.]),
    'category_id': tensor(3),
    'area': tensor(1716.)},
   {'bbox': tensor([1497.,  690.,   35.,   91.]),
    'category_id': tensor(3),
    'area': tensor(3185.)},
   {'bbox': tensor([1134.,  419.,   32.,   69.]),
    'category_id': tensor(3),
    'area': tensor(2208.)},
   {'bbox': tensor([1436.,  386.,   32.,   64.]),
    'category_id': tensor(3),
    'area': tensor(2048.)},
   {'bbox': tensor([508., 560.,  26.,  84.]),
    'category_id': tensor(3),
    'area': tensor(2184.)},
   {'bbox': tensor([995., 285.,  21.,  54.]),
    'category_id': tensor(3),
    'area': tensor(1134.)},
   {'bbox': tensor([729., 278.,  30.,  47.]),
    'category_id': tensor(3),
    'area': tensor(1410.)},
   {'bbox': tens

# Check image and targets after Processor

In [91]:
inputs = PROCESSOR(images=image, annotations=targets, return_tensors='pt')

In [92]:
inputs

{'pixel_values': tensor([[[[ 2.2147,  2.2147,  2.2147,  ..., -0.6109, -0.5253, -0.5424],
          [ 2.2147,  2.2147,  2.2147,  ..., -0.6109, -0.6452, -0.6965],
          [ 2.2147,  2.2147,  2.2147,  ..., -0.6623, -0.9363, -1.0904],
          ...,
          [-0.1657, -0.1486, -0.1486,  ..., -0.3027, -0.3712, -0.5938],
          [-0.1828, -0.1657, -0.1486,  ..., -0.5596, -0.4568, -0.4397],
          [-0.2171, -0.1999, -0.1828,  ..., -0.9020, -0.7822, -0.5767]],

         [[ 2.3936,  2.3936,  2.3936,  ..., -0.1625, -0.0749, -0.0924],
          [ 2.3936,  2.3936,  2.3936,  ..., -0.1800, -0.2150, -0.2850],
          [ 2.3936,  2.3936,  2.3936,  ..., -0.2500, -0.5301, -0.6877],
          ...,
          [ 0.4853,  0.5028,  0.5028,  ...,  0.0301, -0.0399, -0.2675],
          [ 0.4678,  0.4853,  0.5028,  ..., -0.2325, -0.1275, -0.1099],
          [ 0.4328,  0.4503,  0.4678,  ..., -0.5826, -0.4601, -0.2500]],

         [[ 2.5703,  2.5703,  2.5703,  ...,  0.1651,  0.2522,  0.2348],
          [ 2

In [93]:
inputs.pixel_values.shape

torch.Size([1, 3, 750, 1333])

In [94]:
inputs.labels

[{'size': tensor([ 750, 1333]), 'image_id': tensor([1]), 'class_labels': tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 1]), 'boxes': tensor([[0.1430, 0.3903, 0.0182, 0.0620],
         [0.8576, 0.5606, 0.0203, 0.0731],
         [0.2714, 0.3630, 0.0135, 0.0611],
         [0.7888, 0.6810, 0.0182, 0.0843],
         [0.5990, 0.4199, 0.0167, 0.0639],
         [0.7563, 0.3870, 0.0167, 0.0593],
         [0.2714, 0.5574, 0.0135, 0.0778],
         [0.5237, 0.2889, 0.0109, 0.0500],
         [0.3875, 0.2792, 0.0156, 0.0435],
         [0.7906, 0.4111, 0.0135, 0.0667],
         [0.4266, 0.3454, 0.0135, 0.0574],
         [0.4526, 0.3370, 0.0125, 0.0574],
         [0.7857, 0.3532, 0.0141, 0.0620],
         [0.3023, 0.2931, 0.0109, 0.0491],
         [0.5977, 0.3426, 0.0130, 0.0574],
         [0.5607, 0.3079, 0.0120, 0.0491],
         [0.4711, 0.2292, 0.0109, 0.0417],
         [0.7177, 0.3130, 0.0094, 0.0556],
         [0.7711, 0.3583, 0.0099, 0.0537],
         [0.7250, 0.303

In [95]:
import torch
targets_boxes = []
for t in targets:
    boxes = [ann['bbox'] for ann in t['annotations']]
    targets_boxes.append(boxes)

targets_boxes = torch.stack(targets_boxes[0])

In [96]:
targets_boxes

tensor([[ 257.,  388.,   35.,   67.],
        [1627.,  566.,   39.,   79.],
        [ 508.,  359.,   26.,   66.],
        [1497.,  690.,   35.,   91.],
        [1134.,  419.,   32.,   69.],
        [1436.,  386.,   32.,   64.],
        [ 508.,  560.,   26.,   84.],
        [ 995.,  285.,   21.,   54.],
        [ 729.,  278.,   30.,   47.],
        [1505.,  408.,   26.,   72.],
        [ 806.,  342.,   26.,   62.],
        [ 857.,  333.,   24.,   62.],
        [1495.,  348.,   27.,   67.],
        [ 570.,  290.,   21.,   53.],
        [1135.,  339.,   25.,   62.],
        [1065.,  306.,   23.,   53.],
        [ 894.,  225.,   21.,   45.],
        [1369.,  308.,   18.,   60.],
        [1471.,  358.,   19.,   58.],
        [1384.,  297.,   16.,   61.],
        [1083.,  247.,   30.,   39.],
        [ 417.,  241.,   21.,   25.],
        [ 809.,  286.,   10.,   10.]])

In [97]:
inputs.labels[0].boxes

tensor([[0.1430, 0.3903, 0.0182, 0.0620],
        [0.8576, 0.5606, 0.0203, 0.0731],
        [0.2714, 0.3630, 0.0135, 0.0611],
        [0.7888, 0.6810, 0.0182, 0.0843],
        [0.5990, 0.4199, 0.0167, 0.0639],
        [0.7563, 0.3870, 0.0167, 0.0593],
        [0.2714, 0.5574, 0.0135, 0.0778],
        [0.5237, 0.2889, 0.0109, 0.0500],
        [0.3875, 0.2792, 0.0156, 0.0435],
        [0.7906, 0.4111, 0.0135, 0.0667],
        [0.4266, 0.3454, 0.0135, 0.0574],
        [0.4526, 0.3370, 0.0125, 0.0574],
        [0.7857, 0.3532, 0.0141, 0.0620],
        [0.3023, 0.2931, 0.0109, 0.0491],
        [0.5977, 0.3426, 0.0130, 0.0574],
        [0.5607, 0.3079, 0.0120, 0.0491],
        [0.4711, 0.2292, 0.0109, 0.0417],
        [0.7177, 0.3130, 0.0094, 0.0556],
        [0.7711, 0.3583, 0.0099, 0.0537],
        [0.7250, 0.3032, 0.0083, 0.0565],
        [0.5719, 0.2468, 0.0156, 0.0361],
        [0.2227, 0.2347, 0.0109, 0.0231],
        [0.4240, 0.2694, 0.0052, 0.0093]])

In [161]:
def resize_boxes(boxes, orig_size, new_size):
    boxes[:, 0] = boxes[:, 0] + (boxes[:, 2] / 2)
    boxes[:, 1] = boxes[:, 1] + (boxes[:, 3] / 2)
    boxes_resized = boxes.clone()
    h0, w0 = orig_size
    h, w = new_size
    scale_x = w / w0
    scale_y = h / h0
    boxes_resized[:, [0, 2]] *= scale_x
    boxes_resized[:, [1, 3]] *= scale_y
    return boxes_resized


In [162]:
from utils.box_ops import normalize_bboxes, denormalize_bboxes

targets_boxes = []
for t in targets:
    boxes = [ann['bbox'] for ann in t['annotations']]
    targets_boxes.append(boxes)

h, w = image[0].shape[1:]
targets_boxes = torch.stack(targets_boxes[0])

resized_boxes = resize_boxes(targets_boxes, (1080,1920), (750,1333))
norm_boxes = normalize_bboxes(resized_boxes, 750, 1333)

In [163]:
norm_boxes

tensor([[0.1430, 0.3903, 0.0182, 0.0620],
        [0.8576, 0.5606, 0.0203, 0.0731],
        [0.2714, 0.3630, 0.0135, 0.0611],
        [0.7888, 0.6810, 0.0182, 0.0843],
        [0.5990, 0.4199, 0.0167, 0.0639],
        [0.7563, 0.3870, 0.0167, 0.0593],
        [0.2714, 0.5574, 0.0135, 0.0778],
        [0.5237, 0.2889, 0.0109, 0.0500],
        [0.3875, 0.2792, 0.0156, 0.0435],
        [0.7906, 0.4111, 0.0135, 0.0667],
        [0.4266, 0.3454, 0.0135, 0.0574],
        [0.4526, 0.3370, 0.0125, 0.0574],
        [0.7857, 0.3532, 0.0141, 0.0620],
        [0.3023, 0.2931, 0.0109, 0.0491],
        [0.5977, 0.3426, 0.0130, 0.0574],
        [0.5607, 0.3079, 0.0120, 0.0491],
        [0.4711, 0.2292, 0.0109, 0.0417],
        [0.7177, 0.3130, 0.0094, 0.0556],
        [0.7711, 0.3583, 0.0099, 0.0537],
        [0.7250, 0.3032, 0.0083, 0.0565],
        [0.5719, 0.2468, 0.0156, 0.0361],
        [0.2227, 0.2347, 0.0109, 0.0231],
        [0.4240, 0.2694, 0.0052, 0.0093]])