# Sainfoin Seed Classification FasterRCNN Model


### Module Imports

In [1]:
!pip install torchmetrics
import os
import cv2
from glob import glob
import pandas as pd
import torch
import torchvision
import torchmetrics
from random import randint
from torchmetrics.classification import MulticlassAccuracy, MulticlassJaccardIndex
from torchmetrics.detection import IntersectionOverUnion, MeanAveragePrecision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import ToTensor
from torchvision.ops import nms
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import numpy as np
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
from google.colab import drive
from torchvision.models.detection.rpn import AnchorGenerator



Collecting torchmetrics
  Downloading torchmetrics-1.1.1-py3-none-any.whl (763 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/763.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/763.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m757.8/763.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m763.4/763.4 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.9.0 torchmetrics-1.1.1


### Google Drive Authorization and Directory Setup

In [2]:
drive.mount('/content/drive')
BASE_DIR = 'drive/MyDrive/github_repos/sainfoin_seed_classification'
%cd -q $BASE_DIR

Mounted at /content/drive


In [3]:
!ls ./data/test

013b9.JPG  13a10.jpg  14eeb.JPG  17c5a.JPG  9e66b.jpg  9e8f3.JPG
014c3.JPG  13f6c.JPG  15b57.JPG  19ac5.JPG  9e83d.jpg  9fbf6.jpg
10cdd.JPG  14dfc.jpg  17b4d.JPG  9e3e0.jpg  9e890.JPG  9fc38.JPG


In [None]:
# annotations = pd.read_csv()

### Configurations


In [4]:
TRAIN_DIR = './data/train'
VAL_DIR = './data/val'
ANNOT_DIR = './data/annotations'
OUT_DIR = './model_chkpt'
LOG_DIR = './logs'
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
CORES = torch.multiprocessing.cpu_count()
CORES = 4

CLASSES = ['background', 'split', 'seed', 'pod']
CLASSES = {
    '0': 'background',
    '1': 'split',
    '2': 'seed',
    '3': 'pod'
    }
N_CLASSES = 4
RESIZE_TO = 3000
BATCH_SIZE = 2

N_EPOCHS = 50

MODEL_NAME = 'all_anchors'

In [5]:
DEVICE

device(type='cuda')

### Pretrained Faster RCNN with new classifier head

In [6]:
anchor_sizes = ((8,), (16,), (32,), (64,), (128,),)

aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(
    anchor_sizes,
    aspect_ratios
)

model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, N_CLASSES)
model.roi_heads.detections_per_img = 500

model.rpn.anchor_generator = anchor_generator

# model.roi_heads.

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:01<00:00, 94.0MB/s]


In [None]:
backbone = torchvision.models.resnet50(weights='DEFAULT')
backbone.out_channels = 1000
anchor_sizes = ((8,), (16,), (32,), (64,), (128,),)
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)

anchor_generator = AnchorGenerator(
    anchor_sizes,
    aspect_ratios
)

roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

model = torchvision.models.detection.FasterRCNN(
    backbone,
    num_classes=N_CLASSES,
    rpn_anchor_generator = anchor_generator,
    box_detection_per_img=500,
    box_roi_pool=roi_pooler
)

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, N_CLASSES)
# model.rpn.anchor_generator = anchor_generator


In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(weights="DEFAULT").features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be [0]. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

In [7]:
print(model.rpn.anchor_generator)

AnchorGenerator()


### Optimizers and Learning Rate Schedulers

In [8]:
optimizer = torch.optim.SGD(model.parameters(), lr=.05, momentum=0.8)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [9]:
list(CLASSES.keys())[list(CLASSES.values()).index('pod')]

'3'

### Utility Functions


In [10]:
def train_transforms():
  """
  """
  return A.Compose([
      A.RandomBrightnessContrast(p=0.2),
      # A.CenterCrop(width=1000, height=1000, min_visibility=.8, p = .5),
      # A.RandomSizedBBoxSafeCrop(height=1000, width=1000, erosion_rate=0, interpolation=1, p=1),
      A.Flip(p=0.5),
      A.RandomRotate90(p=0.5),
      A.MotionBlur(p=0.2),
      A.Blur(p=0.2, blur_limit=3),
      ToTensorV2(p=1.0)
  ], bbox_params={
      'format': 'pascal_voc',
      'label_fields': ['labels']
  })

def val_transforms():
  """
  """
  return A.Compose([
      ToTensorV2(p=1.0),
      # A.RandomSizedBBoxSafeCrop(height=1000, width=1000, erosion_rate=0, interpolation=1, p=1)
  ], bbox_params={
      'format': 'pascal_voc',
      'label_fields': ['labels']
  })


def collate_fn(batch):
    """
    To handle the data loading as different images may have a different number of objects
    and to handle varying size tensors as well
    :param batch:
    :return:
    """

    return tuple(zip(*batch))

### Datasets and DataLoaders

In [11]:
class SeedDataset(Dataset):
  """
  Subclass of torch.utils.data.Dataset for the sainfoin classification problem
  """

  def __init__(self, image_dir, annot_dir, resize_dims, classes, transforms):
    """
    """
    self.labels = None
    self.dir_path = image_dir
    self.resize_dims = resize_dims
    self.classes = classes
    self.transforms = transforms
    self.img_names = None
    self.img_paths = None

    self.img_paths = np.array([i for i in glob(f"{self.dir_path}/*") if i.lower().endswith('.jpg')])
    self.img_names = np.array([img_path.split('/')[-1] for img_path in self.img_paths])

    sort_index = np.argsort(self.img_names)
    self.img_paths = self.img_paths[sort_index]
    self.img_names = self.img_names[sort_index]

    self.annot_paths = os.path.join(annot_dir, 'annotations_export.csv')
    self.annotations = pd.read_csv(self.annot_paths).drop(columns='Unnamed: 0')


  def __getitem__(self, idx):
    """
    """
    img_path = self.img_paths[idx]
    img_name = self.img_names[idx]
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    img_shape = img.shape[:2]
    # img_resized = cv2.resize(img, self.resize_dims)
    img_resized = img
    img_resized /= 255.0


    if img_name in self.annotations.img_id.unique():
      boxes = []
      labels = []
      tmp_df = self.annotations.copy().loc[self.annotations.img_id==img_name]
      targets = {
          'img_name': img_name,
          }
      for row in tmp_df.values:
        bbox = list(row[5:])
        #scale and resize all bounding boxes according to self.resize_dims
        # bbox = np.array(bbox) / np.array(img_shape * 2)[::-1] * np.array(self.resize_dims*2)
        bbox = np.array(bbox)
        # print(bbox)
        boxes.append(bbox)
        label = list(CLASSES.keys())[list(CLASSES.values()).index(str(row[4]))]
        labels.append(label)


      boxes = np.array(boxes)
      labels = np.array(labels).astype(np.int64)
      targets['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)
      targets['labels'] = torch.as_tensor(labels, dtype=torch.int64)
    else:
      print(f"image {img_name} not annotated)")
      return None, None

    if self.transforms:
      sample = self.transforms(
          image=img_resized,
          bboxes=targets['boxes'],
          labels=labels)
      img_resized = sample['image']
      targets['boxes'] = torch.Tensor(sample['bboxes'])
    else:
      img_resized = ToTensor(img_resized)

    return img_resized, targets

  def __len__(self):
    """
    """
    return len(self.img_names)


# Instantiate Datasets and Dataloaders

In [1]:
train_data = SeedDataset(image_dir=TRAIN_DIR, annot_dir=ANNOT_DIR, resize_dims=(RESIZE_TO, RESIZE_TO), classes=CLASSES, transforms=train_transforms())
val_data = SeedDataset(image_dir=VAL_DIR, annot_dir=ANNOT_DIR, resize_dims=(RESIZE_TO, RESIZE_TO), classes=CLASSES, transforms=val_transforms())

train_loader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=CORES,
    collate_fn=collate_fn,
)

val_loader = DataLoader(
    val_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=CORES,
    collate_fn=collate_fn
)

NameError: ignored

In [None]:
# Sanity check to make sure the datasets are working properly

print(len(train_loader))
print(len(val_loader))

for _ in range(5):
  idx = randint(0, train_data.__len__()-1)
  img, res = train_data.__getitem__(idx)
  print(img, res)

for _ in range(5):
  idx = randint(0, val_data.__len__()-1)
  img, res = val_data.__getitem__(idx)
  print(img, res)

30
8
tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0118, 0.0118, 0.0118,  ..., 0.0157, 0.0157, 0.0157],
         [0.0118, 0.0118, 0.0118,  ..., 0.0157, 0.0157, 0.0157],
         [0.0118, 0.0118, 0.0118,  ..., 0.0157, 0.0157, 0.0157],
         ...,
         [0.0275, 0.0275, 0.0275,  ..., 0.0235, 0.0235, 0.0275],
         [0.0275, 0.0275, 0.0275,  ..., 0.0235, 0.0235, 0.0275],
         [0.0275, 0.0275, 0.0275,  ..., 0.0235, 0.0235, 0.0275]],

        [[0.2275, 0.2275, 0.2275,  ..., 0.2353, 0.2353, 0.2353],
         [0.2275, 0.2275, 0.2275,  ..., 0.2353, 0.2353, 0.2353],
         [0.2275, 0.2275, 0.2275,  ..., 0.2353, 0.235

### Train and Validate Loops

In [None]:
def train(model, optimizer, data_loader):
  prog_bar = tqdm(data_loader, total=len(data_loader))
  loss_list = []
  # print(len(prog_bar))
  # print('Training\n')
  for i, data in enumerate(prog_bar):
    optimizer.zero_grad()
    images, targets = data

    images = [image.to(DEVICE) for image in images]
    targets = [{k: v.to(DEVICE) for k, v in t.items() if k in ['boxes', 'labels']} for t in targets]

    loss_dict = model(images, targets)

    # print(loss_dict)

    losses = sum(loss for loss in loss_dict.values())

    # print(losses)
    loss_value = losses.item()
    loss_list.append(loss_value)
    losses.backward()
    optimizer.step()
    prog_bar.set_description(desc=f"Train Loss: {loss_value: .4f}")

  return loss_list



def validate(model, data_loader):
  # print("Validating\n")

  prog_bar = tqdm(data_loader, total=len(data_loader))
  loss_list = []

  for i, data in enumerate(prog_bar):
    images, targets = data
    images = list(image.to(DEVICE) for image in images)
    targets = [{k: v.to(DEVICE) for k, v in t.items() if k in ['boxes', 'labels']} for t in targets]

    with torch.no_grad():
      loss_dict = model(images, targets)

    losses = sum(loss for loss in loss_dict.values())
    loss_value = losses.item()
    loss_list.append(loss_value)
    prog_bar.set_description(desc=f"Val Loss: {loss_value:.4f}")

  return loss_list

### Main Model Loop

In [None]:
def train_model(model, n_epochs):
  # send model to the device
  model.to(DEVICE)
  train_loss_list = []
  val_loss_list = []
  lr_list = []
  for epoch in range(n_epochs):
    print(f"Epoch: {epoch}\nLR: {scheduler.get_last_lr()}")
    lr_list.append(scheduler.get_last_lr())
    train_losses = train(model=model, optimizer=optimizer, data_loader=train_loader)
    val_losses = validate(model=model, data_loader=val_loader)
    avg_train_loss = np.mean(np.array(train_losses))
    avg_val_loss = np.mean(np.array(val_losses))
    print(avg_train_loss, avg_val_loss)
    train_loss_list.append(avg_train_loss)
    val_loss_list.append(avg_val_loss)
    scheduler.step()
    if epoch==(n_epochs-1):
      torch.save(model.state_dict(), f"{OUT_DIR}/model{epoch}_{MODEL_NAME}.pth")
    print()


  return train_loss_list, val_loss_list, lr_list




In [None]:
results = train_model(model=model, n_epochs=N_EPOCHS)

Epoch: 0
LR: [0.05]


  0%|          | 0/30 [00:08<?, ?it/s]


RuntimeError: ignored

In [None]:
# import matplotlib.pyplot as plt
# figure1, train_ax = plt.subplots()
# figure2, valid_ax = plt.subplots()

# train_ax.set_xlabel('epochs')
# train_ax.set_ylabel('loss')
# train_ax.plot(results[0], color='red', label='train')
# train_ax.plot(results[1], color='blue', label='val')
# train_ax.legend()


### Model Metrics

Calculate multiclassification accuracy, multiclass Jaccard index, Intersection over Union, and Mean Average Precision for the Training and Validation samples

In [None]:
# mca = MulticlassAccuracy(num_classes=N_CLASSES, average=weighted)
# mji = MulticlassJaccardIndex()

# iou = IntersectionOverUnion()
# map = MeanAveragePrecision()

In [13]:
model.load_state_dict(torch.load(
    './model_chkpt/model49_all_anchors.pth', map_location=DEVICE
))

model.to(DEVICE)
model.eval()
# val_data[]

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

### Test Image Inference

In [14]:

filenames = glob('./data/test/*')

filenames = [i for i in filenames if i.lower().endswith('.jpg')]
print(filenames)
for file in filenames:
  print(file)
  img = cv2.imread(file)
  orig = img.copy()
  img = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB).astype(np.float32)
  img /= 255.0
  img = np.transpose(img, (2, 0, 1)).astype(np.float32)
  img = torch.tensor(img, dtype=torch.float).cuda()
  img = torch.unsqueeze(img, 0)
  with torch.no_grad():
          outputs = model(img)
  outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
  print(len(outputs[0]['boxes']))
  nms_idx = nms(boxes=outputs[0]['boxes'], scores=outputs[0]['scores'], iou_threshold=.4)
  print(nms_idx)
  outputs[0]['boxes'] = outputs[0]['boxes'][nms_idx]
  outputs[0]['labels'] = outputs[0]['labels'][nms_idx]
  outputs[0]['scores'] = outputs[0]['scores'][nms_idx]
  print(len(outputs[0]['boxes']))

  # outputs[0]['boxes'] = outputs[0]['boxes']
  # outputs[0]['labels'] =
  # outputs[0]['scores']
  # print(out)
  if len(outputs[0]['boxes']) != 0:
          boxes = outputs[0]['boxes'].data.numpy().astype(np.int32)
          scores = outputs[0]['scores'].data.numpy()
          # boxes = boxes[scores >= detection_threshold].astype(np.int32)
          # scores = scores[scores >= detection_threshold]
          draw_boxes = boxes.copy()
          # get all the predicited class names
          pred_classes = [CLASSES[f'{i}'] for i in outputs[0]['labels'].cpu().numpy()]
          for j, box in enumerate(draw_boxes):
              cv2.rectangle(orig,
                            (int(box[0]), int(box[1])),
                            (int(box[2]), int(box[3])),
                            (255, 255, 0), 1)
              # print(np.round(scores[j], 2))
              cv2.putText(orig, pred_classes[j] + f" {np.round(scores[j], 4): .4f}",
                          (int(box[0]), int(box[1] - 5)),
                          cv2.FONT_HERSHEY_SIMPLEX, .3, (0, 150, 255),
                          1, lineType=cv2.LINE_AA)
              # cv2.putText(orig, pred_classes[j],
              #             (int(box[0]), int(box[1] - 5)),
              #             cv2.FONT_HERSHEY_SIMPLEX, .3, (150, 150, 150),
              #             1, lineType=cv2.LINE_AA)
          # cv2.namedWindow('Prediction', cv2.WINDOW_NORMAL)
          # cv2.imshow('Prediction', orig_image)
          # cv2.waitKey(0)
          cv2.imwrite(f"{file.split('/')[-1]}.jpg", orig, )

['./data/test/14dfc.jpg', './data/test/9e83d.jpg', './data/test/9e3e0.jpg', './data/test/9fbf6.jpg', './data/test/13a10.jpg', './data/test/9e66b.jpg', './data/test/10cdd.JPG', './data/test/9fc38.JPG', './data/test/14eeb.JPG', './data/test/014c3.JPG', './data/test/15b57.JPG', './data/test/17b4d.JPG', './data/test/17c5a.JPG', './data/test/9e890.JPG', './data/test/13f6c.JPG', './data/test/013b9.JPG', './data/test/19ac5.JPG', './data/test/9e8f3.JPG']
./data/test/14dfc.jpg
77
tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 71, 76])
67
./data/test/9e83d.jpg
182
tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33, 

KeyboardInterrupt: ignored

### Blue Image Inference and Results

In [None]:
img_dir = './data/blue_images/'
output_dir = './data/blue_images/output/'

filenames = glob(img_dir+'*')

filenames = [i for i in filenames if i.lower().endswith('.jpg')]

res = []
for file in filenames:
  print(file)
  img = cv2.imread(file)
  orig = img.copy()
  img = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB).astype(np.float32)
  img /= 255.0
  img = np.transpose(img, (2, 0, 1)).astype(np.float32)
  img = torch.tensor(img, dtype=torch.float).cuda()
  img = torch.unsqueeze(img, 0)
  with torch.no_grad():
          outputs = model(img)
  outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
  nms_idx = nms(boxes=outputs[0]['boxes'], scores=outputs[0]['scores'], iou_threshold=.4)
  outputs[0]['boxes'] = outputs[0]['boxes'][nms_idx]
  outputs[0]['labels'] = outputs[0]['labels'][nms_idx]
  outputs[0]['scores'] = outputs[0]['scores'][nms_idx]

  if len(outputs[0]['boxes']) != 0:
          boxes = outputs[0]['boxes'].data.numpy().astype(np.int32)
          scores = outputs[0]['scores'].data.numpy()
          draw_boxes = boxes.copy()
          # get all the predicited class names
          pred_classes = [CLASSES[f'{i}'] for i in outputs[0]['labels'].cpu().numpy()]
          for j, box in enumerate(draw_boxes):
              img_dict = {'img_id': file,
                          'class': pred_classes[j],
                          'score': scores[j],
                          'xmin':box[0],
                          'ymin': box[1],
                          'xmax': box[2],
                          'ymax': box[3]}
              res.append(img_dict)
              cv2.rectangle(orig,
                            (int(box[0]), int(box[1])),
                            (int(box[2]), int(box[3])),
                            (255, 255, 0), 1)
              cv2.putText(orig, pred_classes[j] + f" {np.round(scores[j], 4): .4f}",
                          (int(box[0]), int(box[1] - 5)),
                          cv2.FONT_HERSHEY_SIMPLEX, .3, (0, 150, 255),
                          1, lineType=cv2.LINE_AA)

          cv2.imwrite(f"{os.path.join(output_dir, file.split('/')[-1][:-4])}.jpg", orig, )

res_df = pd.DataFrame.from_dict(res)
res_df.to_csv('results.csv')

./data/blue_images/0a72b.jpg
./data/blue_images/1e90e.jpg
./data/blue_images/1cd49.jpg
./data/blue_images/1b70a.jpg
./data/blue_images/1dd66.JPG
./data/blue_images/1c4f6.jpg
./data/blue_images/1c2c2.JPG
./data/blue_images/1d9e6.jpg
./data/blue_images/1d68b.jpg
./data/blue_images/0ffee.jpg
./data/blue_images/1cd45.jpg
./data/blue_images/0fcde.jpg
./data/blue_images/0fd35.JPG
./data/blue_images/0d67c.jpg
./data/blue_images/0f6c9.JPG
./data/blue_images/0efcd.JPG
./data/blue_images/0dbc7.JPG
./data/blue_images/0ad81.jpg
./data/blue_images/0d1ce.JPG
./data/blue_images/0b5e8.JPG
./data/blue_images/0be16.jpg
./data/blue_images/0b434.jpg
./data/blue_images/5fea6.jpg
./data/blue_images/6a943.JPG
./data/blue_images/5fcd1.JPG
./data/blue_images/5e0c0.JPG
./data/blue_images/5d5ca.JPG
./data/blue_images/5f09f.jpg
./data/blue_images/5ceaa.jpg
./data/blue_images/5c4fd.jpg
./data/blue_images/4ecc7.JPG
./data/blue_images/4fc87.jpg
./data/blue_images/4e765.jpg
./data/blue_images/4e4c4.JPG
./data/blue_im