<a href="https://colab.research.google.com/github/Leon-web-net/Computer_Vision/blob/main/YOLO_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Change Directory/albumentations/Config_list

In [1]:
import os
# Change to a new directory
new_directory = '/content/drive/MyDrive/YOLOV3_Pytorch/'
os.chdir(new_directory)

# Confirm the changey
print("New working directory:", os.getcwd())

New working directory: /content/drive/MyDrive/YOLOV3_Pytorch


In [2]:
!pip uninstall albumentations
!pip install albumentations

Found existing installation: albumentations 1.4.20
Uninstalling albumentations-1.4.20:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/albumentations-1.4.20.dist-info/*
    /usr/local/lib/python3.10/dist-packages/albumentations/*
Proceed (Y/n)? y
  Successfully uninstalled albumentations-1.4.20
Collecting albumentations
  Downloading albumentations-1.4.24-py3-none-any.whl.metadata (37 kB)
Collecting albucore==0.0.23 (from albumentations)
  Downloading albucore-0.0.23-py3-none-any.whl.metadata (5.3 kB)
Collecting simsimd>=5.9.2 (from albucore==0.0.23->albumentations)
  Downloading simsimd-6.2.1-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.0/66.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Downloading albumentations-1.4.24-py3-none-any.whl (274 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.9/274.9 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading albucore-0.0.

# Albumentation Edit

In [3]:
import shutil


# Replace the file from Google Drive
src = "/content/drive/MyDrive/YOLOV3_Pytorch/bbox_utils.py"
dst = "/usr/local/lib/python3.10/dist-packages/albumentations/core/bbox_utils.py"

shutil.copy(src, dst)
print("File replaced successfully!")


File replaced successfully!


In [4]:
"""
Information about architecture config:
Tuple is structured by (filters, kernel_size, stride)
Every conv is a same convolution.
List is structured by "B" indicating a residual block followed by the number of repeats
"S" is for scale prediction block and computing the yolo loss
"U" is for upsampling the feature map and concatenating with a previous layer
"""

config_list = [
    (32, 3, 1),
    (64, 3, 2),
    ["B", 1],
    (128, 3, 2),
    ["B", 2],
    (256, 3, 2),
    ["B", 8],
    (512, 3, 2),
    ["B", 8],
    (1024, 3, 2),
    ["B", 4],  # To this point is Darknet-53
    (512, 1, 1),
    (1024, 3, 1),
    "S",
    (256, 1, 1),
    "U",
    (256, 1, 1),
    (512, 3, 1),
    "S",
    (128, 1, 1),
    "U",
    (128, 1, 1),
    (256, 3, 1),
    "S",
]

# Model

In [5]:
import torch
import torch.nn as nn


class CNNBlock(nn.Module):
  def __init__(self, in_channels, out_channels, bn_act=True, **kwargs):
    super().__init__()
    self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs)
    self.bn = nn.BatchNorm2d(out_channels)
    self.leaky = nn.LeakyReLU(0.1)
    self.use_bn_act = bn_act


  def forward(self, x):
    if self.use_bn_act:
      return self.leaky(self.bn(self.conv(x)))
    else:
      return self.conv(x)

class ResidualBlock(nn.Module):
  def __init__(self, channels, use_residual=True, num_repeats=1):
    super().__init__()
    self.layers = nn.ModuleList()
    for repeat in range(num_repeats):
      self.layers +=[
          nn.Sequential(
          CNNBlock(channels, channels//2, kernel_size=1),
          CNNBlock(channels//2, channels, kernel_size=3, padding=1),
          )
      ]
      self.use_residual = use_residual
      self.num_repeats = num_repeats

  def forward(self, x):
    for layer in self.layers:
      x = layer(x) + x if self.use_residual else layer(x)
    return x

class ScalePrediction(nn.Module):
  def __init__(self, in_channels, num_classes):
    super().__init__()
    self.pred = nn.Sequential(
        CNNBlock(in_channels, 2*in_channels, kernel_size=3, padding=1),
        CNNBlock(2*in_channels, 3*(5+num_classes), bn_act=False, kernel_size=1),  # [po, x, y, w, h]
    )

    self.num_classes = num_classes

  def forward(self,x):
    return(
        self.pred(x).reshape(
            x.shape[0],3, self.num_classes+5, x.shape[2], x.shape[3]
        ).permute(0,1,3,4,2)  # change order
    )

class YOLOv3(nn.Module):

  def __init__(self, in_channels=3, num_classes=10):
    super().__init__()
    self.num_classes = num_classes
    self.in_channels = in_channels
    self.layers = self._create_conv_layers()

  def forward(self,x):
    outputs = []
    route_connections = []

    for layer in self.layers:
      if isinstance(layer, ScalePrediction):
        outputs.append(layer(x))
        continue
      x = layer(x)

      if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
        route_connections.append(x)

      elif isinstance(layer, nn.Upsample):
        x = torch.cat([x, route_connections[-1]], dim=1)
        route_connections.pop()

    return outputs

  def _create_conv_layers(self):
    layers = nn.ModuleList()
    in_channels = self.in_channels

    for module in config_list:
      if isinstance(module, tuple):
        out_channels, kernel_size, stride = module
        layers.append(CNNBlock(
            in_channels,
            out_channels,
            kernel_size = kernel_size,
            stride = stride,
            padding = 1 if kernel_size == 3 else 0,
        ))
        in_channels = out_channels

      elif isinstance(module, list):
        num_repeats = module[1]
        layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))

      elif isinstance(module, str):
        if module == "S":
          layers+=[
              ResidualBlock(in_channels, use_residual=False, num_repeats=1),
              CNNBlock(in_channels, in_channels//2, kernel_size=1),
              ScalePrediction(in_channels//2, num_classes=self.num_classes),
          ]

          in_channels = in_channels // 2

        elif module == "U":
          layers.append(nn.Upsample(scale_factor=2),)
          in_channels = in_channels * 3

    return layers


if __name__ == "__main__":
  num_classes = 10
  IMAGE_SIZE = 416
  model = YOLOv3(num_classes=num_classes)
  x = torch.randn((2,3,IMAGE_SIZE,IMAGE_SIZE))
  out = model(x)
  assert model(x)[0].shape == (2,3,IMAGE_SIZE//32, IMAGE_SIZE//32, num_classes+5)
  assert model(x)[1].shape == (2,3,IMAGE_SIZE//16, IMAGE_SIZE//16, num_classes+5)
  assert model(x)[2].shape == (2,3,IMAGE_SIZE//8, IMAGE_SIZE//8, num_classes+5)
  print("Success!")


Success!


## Imports

In [6]:
import numpy as np
import os
import pandas as pd
import torch

from PIL import Image, ImageFile
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from utils import iou_width_height, intersection_over_union, non_max_suppression


# Dataset Loader

In [7]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

# class YOLODataset(Dataset):
#   def __init__(self,
#                csv_file,
#                img_dir,label_dir,
#                anchors,
#                S= [13,26,52],
#                C=20,
#                transform=None):
#     self.annotations = pd.read_csv(csv_file)
#     self.img_dir = img_dir
#     self.label_dir = label_dir
#     self.transform = transform
#     self.S = S
#     self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])
#     self.num_anchors = self.anchors.shape[0]
#     self.num_anchors_per_scale = self.num_anchors // 3
#     self.C = C
#     self.ignore_iou_thresh = 0.5

#   def __len__(self):
#     return len(self.annotations)

#   def __getitem__(self, index):
#     label_path = os.path.join(self.label_dir, self.annotations.iloc[index,1])
#     # [x y w h class]
#     bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
#     img_path = os.path.join(self.img_dir, self.annotations.iloc[index,0])
#     image = np.array(Image.open(img_path).convert("RGB"))

#     # Convert bounding boxes from [x_center, y_center, width, height] -> [x_min, y_min, x_max, y_max]
#     # bboxes = [
#     #     [
#     #         b[0] - b[2] / 2,  # x_min
#     #         b[1] - b[3] / 2,  # y_min
#     #         b[0] + b[2] / 2,  # x_max
#     #         b[1] + b[3] / 2,  # y_max
#     #         b[4],             # class
#     #     ]
#     #     for b in bboxes
#     # ]

#     if self.transform:
#       print(f"self transform: {self.transform}, this should be none")
#       augmentations = self.transform(image= image, bboxes=bboxes)
#       image = augmentations["image"]
#       bboxes = augmentations["bboxes"]

#     # P_O (probability of object) =>  [p_o, x, y, w, h, c]
#     targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
#     boxes = []

#     for box in bboxes:
#       iou_anchors = iou_width_height(torch.tensor(box[2:4]), self.anchors)
#       anchor_indices = iou_anchors.argsort(descending=True, dim=0)
#       x, y, width, height, class_label = box
#       has_anchor = [False,False, False]

#       for anchor_idx in anchor_indices:
#         # 0,1,2 which scale
#         scale_idx = anchor_idx // self.num_anchors_per_scale
#         # 0,1,2 which anchor on scale
#         anchor_on_scale = anchor_idx % self.num_anchors_per_scale
#         S = self.S[scale_idx] # how many cell in scale
#         i,j = int(S*y), int(S*x) # x =0.5, S=13
#         anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
#         if not anchor_taken and not has_anchor[scale_idx]:
#           targets[scale_idx][anchor_on_scale, i, j, 0] = 1
#           x_cell, y_cell = S * x - j, S * y - i
#           width_cell, height_cell = (
#               width * S,
#               height * S,
#           )

#           box_coordinates = torch.tensor(
#               [x_cell, y_cell, width_cell, height_cell]
#           )
#           targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
#           targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
#           has_anchor[scale_idx] = True

#         elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
#           targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction

#     return image, tuple(targets)





# Loss Function

In [8]:
import torch
import torch.nn as nn

from utils import intersection_over_union

class YoloLoss(nn.Module):
  def __init__(self):
    super().__init__()
    self.mse = nn.MSELoss()
    self.bce = nn.BCEWithLogitsLoss()
    self.entropy = nn.CrossEntropyLoss()
    self.sigmoid = nn.Sigmoid()

    # Constants
    self.lambda_class = 1
    self.lambda_noobj = 10
    self.lambda_obj = 1
    self.lambda_box = 10


  def forward(self, predictions, target, anchors):
    obj = target[...,0] == 1
    noobj = target[...,0] == 0

    # No object loss
    no_object_loss = self.bce(
        (predictions[...,0:1][noobj]), (target[...,0:1][noobj]),
    )

    # Object Loss
    anchors = anchors.reshape(1,3,1,1,2)
    box_preds = torch.cat([self.sigmoid(predictions[...,1:3]), torch.exp(predictions[...,3:5])*anchors], dim=-1)
    ious = intersection_over_union(box_preds[obj], target[...,1:5][obj]).detach()
    object_loss = self.bce((predictions[...,0:1][obj]),(ious*target[...,0:1][obj]))

    # Box Coordinate Loss
    predictions[...,1:3] = self.sigmoid(predictions[...,1:3]) # x,y to be [0,1]
    target[...,3:5] = torch.log(
        (1e-16 + target[...,3:5] / anchors) # avoid torch log of 0 (changed from -16 -> -16)
    )
    box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])

    # Class Loss
    class_loss = self.entropy(
        (predictions[..., 5:][obj]), (target[..., 5][obj].long()),
    ) # check class label

    return (
        self.lambda_box*box_loss
        +self.lambda_obj*object_loss
        +self.lambda_noobj*no_object_loss
        +self.lambda_class*class_loss
    )


# Train

In [20]:

import config
import torch
import torch.optim as optim
import tqdm

from utils import (
    mean_average_precision,
    cells_to_bboxes,
    get_evaluation_bboxes,
    save_checkpoint,
    load_checkpoint,
    check_class_accuracy,
    get_loaders,
    plot_couple_examples,
)

torch.backends.cudnn.benchmark = True

In [10]:
from PIL import Image

# Open an image file
image = Image.open("/content/drive/MyDrive/datasets/YOLOV3_nuim/images/00000001.jpg")

# Get image size
width, height = image.size

print(f"Width: {width}, Height: {height}")

Width: 1600, Height: 900


In [22]:
import importlib, Test_module, utils,dataset,config
importlib.reload(Test_module)
importlib.reload(utils)
from Test_module import test_fn
importlib.reload(config)
test_fn()

Hi config working? batch size


In [23]:


def train_fn(train_loader,model,optimizer,loss_fn,scaler, scaled_anchors):
  loop = tqdm.tqdm(train_loader, leave=True)
  losses = []
  # print(f"\n\n\nB4 FOR LOOP\n\n\n")
  for batch_idx, (x,y) in enumerate(loop):

    x = x.to(config.DEVICE)
    y0,y1,y2 = (
        y[0].to(config.DEVICE),
        y[1].to(config.DEVICE),
        y[2].to(config.DEVICE),
    )

    # print(F"\n\n\nPAST FOR LOOP\n\n\n")

    with torch.cuda.amp.autocast(): # float 16 pytorch
      out = model(x)
      loss = (
          loss_fn(out[0], y0, scaled_anchors[0])
          + loss_fn(out[1], y1, scaled_anchors[1])
          + loss_fn(out[2], y2, scaled_anchors[2])
      )

    losses.append(loss.item())
    optimizer.zero_grad()
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()

    # update progress bar
    mean_loss = sum(losses)/len(losses)
    loop.set_postfix(loss=mean_loss)

def main():
  model = YOLOv3(num_classes=config.NUM_CLASSES).to(config.DEVICE)
  optimizer = optim.Adam(
      model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY
  )

  loss_fn = YoloLoss()
  scaler = torch.cuda.amp.GradScaler()

  train_loader, test_loader, train_eval_loader = get_loaders(
      train_csv_path=config.DATASET + "/train.csv",
      test_csv_path=config.DATASET + "/test.csv",)


  if config.LOAD_MODEL:
    load_checkpoint(
        config.CHECKPOINT_FILE, model, optimizer, config.LEARNING_RATE
  )
  scaled_anchors = (
        torch.tensor(config.ANCHORS)
        * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1,3,2)
    ).to(config.DEVICE)

  for epoch in range(config.NUM_EPOCHS):
    train_fn(train_loader, model, optimizer, loss_fn, scaler, scaled_anchors)

    # if config.SAVE_MODEL:
    #   save_checkpoint(model, optimizer, filename=f"checkpoint.pth.tar")

    # print(f"Currently epoch {epoch}")
    # print("On Train Eval loader:")
    # print("On Train loader:")


    if epoch % 5 == 0 and epoch> 0:
      print(f"Epoch: {epoch}, checking class accuracy")
      check_class_accuracy(model, test_loader, threshold=config.CONF_THRESHOLD)
      print("Fiinished Checking class accuracy")
      pred_boxes, true_boxes = get_evaluation_bboxes(
          test_loader,
          model,
          iou_threshold=config.NMS_IOU_THRESH,
          anchors=config.ANCHORS,
          threshold=config.CONF_THRESHOLD,
      )
      mapval = mean_average_precision(
          pred_boxes,
          true_boxes,
          iou_threshold=config.MAP_IOU_THRESH,
          box_format="midpoint",
          num_classes=config.NUM_CLASSES,
      )
      print(f"MAP: {mapval.item()}")
      model.train()

    if config.SAVE_MODEL:
      save_checkpoint(model, optimizer, filename=f"checkpoint.pth.tar")

if __name__ == "__main__":
  main()

print("hi")

  scaler = torch.cuda.amp.GradScaler()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  with torch.cuda.amp.autocast(): # float 16 pytorch
100%|██████████| 3/3 [00:05<00:00,  1.78s/it, loss=66.2]


=> Saving checkpoint


  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
100%|██████████| 3/3 [00:02<00:00,  1.46it/s, loss=65.8]


=> Saving checkpoint


  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
100%|██████████| 3/3 [00:01<00:00,  1.59it/s, loss=62.4]


=> Saving checkpoint


  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
100%|██████████| 3/3 [00:02<00:00,  1.20it/s, loss=49]


=> Saving checkpoint


  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
100%|██████████| 3/3 [00:01<00:00,  1.53it/s, loss=44.5]


=> Saving checkpoint


  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
  bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
100%|██████████| 3/3 [00:02<00:00,  1.46it/s, loss=34.7]


Epoch: 5, checking class accuracy


100%|██████████| 1/1 [00:00<00:00,  1.53it/s]


Class accuracy is: 12.121212%
No obj accuracy is: 41.200058%
Obj accuracy is: 50.649349%
Fiinished Checking class accuracy


  0%|          | 0/1 [00:00<?, ?it/s]

Batch: 0 loaded
Processing batch index 0
Number of bounding boxes before NMS: 10647
Box format: midpoint

 Before loop, No. Boxes: 6281

No. Boxes: 6280
No. Boxes: 6279
No. Boxes: 6278
No. Boxes: 6277
No. Boxes: 6274
No. Boxes: 6271
No. Boxes: 6270
No. Boxes: 6269
No. Boxes: 6268
No. Boxes: 6267
No. Boxes: 6265
No. Boxes: 6262
No. Boxes: 6261
No. Boxes: 6259
No. Boxes: 6258
No. Boxes: 6257
No. Boxes: 6254
No. Boxes: 6253
No. Boxes: 6251
No. Boxes: 6248
No. Boxes: 6245
No. Boxes: 6243
No. Boxes: 6241
No. Boxes: 6238
No. Boxes: 6236
No. Boxes: 6234
No. Boxes: 6232
No. Boxes: 6230
No. Boxes: 6228
No. Boxes: 6225
No. Boxes: 6222
No. Boxes: 6220
No. Boxes: 6217
No. Boxes: 6214
No. Boxes: 6211
No. Boxes: 6209
No. Boxes: 6207
No. Boxes: 6205
No. Boxes: 6204
No. Boxes: 6202
No. Boxes: 6200
No. Boxes: 6198
No. Boxes: 6195
No. Boxes: 6193
No. Boxes: 6192
No. Boxes: 6191
No. Boxes: 6189
No. Boxes: 6187
No. Boxes: 6186
No. Boxes: 6183
No. Boxes: 6181
No. Boxes: 6179
No. Boxes: 6178
No. Boxes: 6176

  0%|          | 0/1 [01:32<?, ?it/s]

No. Boxes: 5601





KeyboardInterrupt: 

In [14]:
importlib.reload(dataset)
importlib.reload(config)

<module 'config' from '/content/drive/MyDrive/YOLOV3_Pytorch/config.py'>

# Test

In [11]:
csv_file = "/content/drive/MyDrive/datasets/YOLOV3_nuim/train.csv"
annotations = pd.read_csv(csv_file)
dataset_dir = '/content/drive/MyDrive/datasets/YOLOV3_nuim'
img_dir =  dataset_dir + '/images/'
label_dir = dataset_dir + "/labels/"

index = 14
img_path = os.path.join(img_dir, annotations.iloc[index, 0])
label_path = os.path.join(label_dir, annotations.iloc[index, 1])

bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()

img_path

'/content/drive/MyDrive/datasets/YOLOV3_nuim/images/00000031.jpg'

In [12]:
# [x y w h class]
bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
img_path = os.path.join(img_dir, annotations.iloc[index, 0])
image = np.array(Image.open(img_path).convert("RGB"))
image.shape
targets = [torch.zeros((3, S, S, 6)) for S in config.S]
anchors = config.ANCHORS
anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2])
boxes = []
box = bboxes[0]

iou_anchors = iou_width_height(torch.tensor(box[2:4]), anchors)
anchor_indices = iou_anchors.argsort(descending=True, dim=0)
x, y, width, height, class_label = box
has_anchor = [False,False, False]

num_anchors = anchors.shape[0]
num_anchors_per_scale = num_anchors // 3
S = config.S

x,y,width,height

(0.73344, 0.57056, 0.15687, 0.20333)

In [17]:
import importlib
from albumentations.core import bbox_utils

# Force reload of the module
importlib.reload(bbox_utils)

<module 'albumentations.core.bbox_utils' from '/usr/local/lib/python3.10/dist-packages/albumentations/core/bbox_utils.py'>

In [18]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

 # Example bbox: [x_center, y_center, width, height]
image = np.array(Image.open(img_path).convert("RGB"))
bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()

# Transformation pipeline
transform = A.Compose(
    [
        A.Resize(height=416, width=416),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),

)

# Apply transformations
transformed = transform(image=image, bboxes=bboxes)
bboxes_0 = transformed["bboxes"]
# Convert list of tuples to NumPy array for efficient processing
bboxes_clip = np.array(bboxes_0)
# Clip x_center, y_center, width, height to [np.finfo(np.float32).tiny, 1.0]
bboxes_clip[:, :4] = np.clip(bboxes_clip[:, :4], np.finfo(np.float16).tiny, 1.0)

print("Original BBoxes:", bboxes)
print("Transformed BBoxes:", transformed["bboxes"])
print("Clipped: ", bboxes_clip)

Original BBoxes: [[0.73344, 0.57056, 0.15687, 0.20333, 4.0], [0.48219, 0.46056, 0.01188, 0.01667, 4.0], [0.24969, 0.49111, 0.08188, 0.04889, 4.0], [0.44188, 0.47778, 0.0275, 0.04444, 4.0], [0.555, 0.47667, 0.02, 0.03111, 4.0], [0.45469, 0.47, 0.01438, 0.02222, 4.0], [0.02937, 0.70389, 0.05875, 0.29, 4.0], [0.84562, 0.49833, 0.14125, 0.09222, 9.0], [0.91969, 0.61167, 0.16062, 0.37222, 4.0], [0.44844, 0.45667, 0.01562, 0.02, 4.0], [0.53406, 0.45667, 0.09813, 0.05556, 9.0], [0.52625, 0.47167, 0.02125, 0.03222, 4.0]]
Transformed BBoxes: [[0.7334399819374084, 0.5705599784851074, 0.1568700075149536, 0.20333001017570496, 4.0], [0.48219001293182373, 0.46055999398231506, 0.011879980564117432, 0.01666998863220215, 4.0], [0.2496899962425232, 0.49111002683639526, 0.08187998831272125, 0.04889002442359924, 4.0], [0.4418799877166748, 0.4777800142765045, 0.0274999737739563, 0.04443997144699097, 4.0], [0.5550000071525574, 0.4766699969768524, 0.019999980926513672, 0.031109988689422607, 4.0], [0.45469000

In [None]:
import os
import numpy as np
import torch


num_anchors_per_scale = 3  # Adjust based on your model
anchors = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]])  # Example anchor dimensions


def validate_box_coordinates(label_dir, annotations):
    for index in range(len(annotations)):
            # Load bounding boxes for the current index
            label_path = os.path.join(label_dir, annotations.iloc[index, 1])
            bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
            img_path = os.path.join(img_dir, annotations.iloc[index, 0])
            image = np.array(Image.open(img_path).convert("RGB"))
            for box in bboxes:
                x, y, width, height, class_id = box[:5]
                iou_anchors = iou_width_height(torch.tensor(box[2:4]), anchors)
                anchor_indices = iou_anchors.argsort(descending=True, dim=0)
                has_anchor = [False, False, False]

                for anchor_idx in anchor_indices:
                    # Calculate scale and anchor indices
                    scale_idx = anchor_idx // num_anchors_per_scale
                    anchor_on_scale = anchor_idx % num_anchors_per_scale
                    S_0 = S[scale_idx]
                    i, j = int(S_0 * y), int(S_0 * x)
                    anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
                    if not anchor_taken and not has_anchor[scale_idx]:
                        targets[scale_idx][anchor_on_scale, i, j, 0] = 1
                        x_cell, y_cell = S_0 * x - j, S_0 * y - i
                        width_cell, height_cell = width * S_0, height * S_0

                        # Create box coordinates tensor
                        box_coordinates = torch.tensor([x_cell, y_cell, width_cell, height_cell])

                        targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
                        targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
                        has_anchor[scale_idx] = True
                        print(targets[scale_idx][anchor_on_scale, i, j, 1:5])
                    elif not anchor_taken and iou_anchors[anchor_idx] > 0.5:
                        targets[scale_idx][anchor_on_scale, i, j, 0] = -1  # ignore prediction


    return image, tuple(targets)

# Run the validation
hello = validate_box_coordinates(label_dir, annotations)
