# Convert to .py

jupyter nbconvert --to script Helmet_DETR.ipynb

In [1]:
from pathlib import Path
import os
import numpy as np

import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
torch.set_grad_enabled(False)

2.5.1+cu124 True


<torch.autograd.grad_mode.set_grad_enabled at 0x71af8cf93190>

In [2]:
%matplotlib inline

import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

In [6]:
first_class_index = 1
assert(first_class_index in [0, 1])

if first_class_index == 0:

  # There is one class, balloon, with ID n°0.

  num_classes = 3

  finetuned_classes = [
      'helmet',
      'head',
      'person'
  ]

  # The `no_object` class will be automatically reserved by DETR with ID equal
  # to `num_classes`, so ID n°1 here.

else:

  # There is one class, balloon, with ID n°1.
  #
  # However, DETR assumes that indexing starts with 0, as in computer science,
  # so there is a dummy class with ID n°0.
  # Caveat: this dummy class is not the `no_object` class reserved by DETR.

  num_classes = 2

  finetuned_classes = [
      'N/A',  'helmet', 'head'
  ]

  # The `no_object` class will be automatically reserved by DETR with ID equal
  # to `num_classes`, so ID n°2 here.

print('First class index: {}'.format(first_class_index))
print('Parameter num_classes: {}'.format(num_classes))
print('Fine-tuned classes: {}'.format(finetuned_classes))

First class index: 1
Parameter num_classes: 2
Fine-tuned classes: ['N/A', 'helmet', 'head']


In [4]:
# %%

# %cd /content/
!rm -rf detr
!git clone https://github.com/woctezuma/detr.git

%cd detr/

!git checkout finetune

Cloning into 'detr'...
remote: Enumerating objects: 265, done.[K
remote: Total 265 (delta 0), reused 0 (delta 0), pack-reused 265 (from 1)[K
Receiving objects: 100% (265/265), 325.44 KiB | 3.87 MiB/s, done.
Resolving deltas: 100% (127/127), done.
/home/epanagiotou/AdvancedNLP/DETR_reimplementation/detr
Already on 'finetune'
Your branch is up to date with 'origin/finetune'.


In [7]:
# Get pretrained weights
checkpoint = torch.hub.load_state_dict_from_url(
            url='https://dl.fbaipublicfiles.com/detr/detr-r50-e632da11.pth',
            map_location='cpu',
            check_hash=True)

# Remove class weights
del checkpoint["model"]["class_embed.weight"]
del checkpoint["model"]["class_embed.bias"]

# Save
torch.save(checkpoint,
           '/home/epanagiotou/AdvancedNLP/DETR_reimplementation/detr-r50_no-class-heads.pth')

In [None]:
# !python main.py \
#   --dataset_file "custom" \
#   --coco_path "/home/lefki/AdvancedNLP/DETR_reimplementation/processed_dataset" \
#   --output_dir "outputs" \
#   --resume "/home/lefki/AdvancedNLP/DETR_reimplementation/detrdetr-r50_no-class-head.pth" \
#   --num_classes $num_classes \
#   --epochs 2

!python main.py \
  --dataset_file "custom" \
  --coco_path "/home/epanagiotou/AdvancedNLP/DETR_reimplementation/processed_dataset" \
  --output_dir "outputs" \
  --resume "/home/epanagiotou/AdvancedNLP/DETR_reimplementation/detr-r50_no-class-heads.pth" \
  --num_classes $num_classes \
  --epochs 400 \
  --batch_size 8 \
  --lr_drop 150 \
  --lr 1e-6


Not using distributed mode
git:
  sha: e891373a8cab93d3719afb0acff28de94c6dc6a3, status: clean, branch: finetune

Namespace(lr=0.01, lr_backbone=1e-05, batch_size=8, weight_decay=0.0001, epochs=400, lr_drop=150, clip_max_norm=0.1, frozen_weights=None, backbone='resnet50', dilation=False, position_embedding='sine', enc_layers=6, dec_layers=6, dim_feedforward=2048, hidden_dim=256, dropout=0.1, nheads=8, num_queries=100, pre_norm=False, masks=False, aux_loss=True, set_cost_class=1, set_cost_bbox=5, set_cost_giou=2, mask_loss_coef=1, dice_loss_coef=1, bbox_loss_coef=5, giou_loss_coef=2, eos_coef=0.1, num_classes=2, dataset_file='custom', coco_path='/home/epanagiotou/AdvancedNLP/DETR_reimplementation/processed_dataset', coco_panoptic_path=None, remove_difficult=False, output_dir='outputs', device='cuda', seed=42, resume='/home/epanagiotou/AdvancedNLP/DETR_reimplementation/detr-r50_no-class-heads.pth', start_epoch=0, eval=False, num_workers=2, world_size=1, dist_url='env://', distributed=Fal

In [5]:
# %%

model = torch.hub.load('facebookresearch/detr',
                       'detr_resnet50',
                       pretrained=False,
                       num_classes=num_classes)

checkpoint = torch.load('/home/lefki/ECE766/helmet_detr/output/checkpoint.pth',
                        map_location='cpu')

model.load_state_dict(checkpoint['model'],
                      strict=False)

model.eval();

Downloading: "https://github.com/facebookresearch/detr/zipball/main" to /home/epanagiotou/.cache/torch/hub/main.zip
  checkpoint = torch.load('/home/lefki/ECE766/helmet_detr/output/checkpoint.pth',


FileNotFoundError: [Errno 2] No such file or directory: '/home/lefki/ECE766/helmet_detr/output/checkpoint.pth'

In [7]:
# %%

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

In [8]:
# %%

def plot_finetuned_results(pil_img, prob=None, boxes=None):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    if prob is not None and boxes is not None:
      for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
          ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                    fill=False, color=c, linewidth=3))
          cl = p.argmax()
          text = f'{finetuned_classes[cl]}: {p[cl]:0.2f}'
          ax.text(xmin, ymin, text, fontsize=15,
                  bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()

In [9]:
# %%

import torchvision.transforms as T

# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

In [10]:
# %%

def filter_bboxes_from_outputs(outputs,
                               threshold=0.7):
  
  # keep only predictions with confidence above threshold
  probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
  keep = probas.max(-1).values > threshold

  probas_to_keep = probas[keep]

  # convert boxes from [0; 1] to image scales
  bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
  
  return probas_to_keep, bboxes_scaled

In [11]:
# %%

def run_worflow(my_image, my_model):
  # mean-std normalize the input image (batch-size: 1)
  img = transform(my_image).unsqueeze(0)

  # propagate through the model
  outputs = my_model(img)

  for threshold in [0.8]:
    
    probas_to_keep, bboxes_scaled = filter_bboxes_from_outputs(outputs,
                                                              threshold=threshold)

    plot_finetuned_results(my_image,
                           probas_to_keep, 
                           bboxes_scaled)

In [None]:
# Visualize predictions

from PIL import Image
img_name = '/home/lefki/ECE766/helmet_detr/test_images/hard_hat_workers32.png'
# img_name = '/home/lefki/ECE766/helmet_detr/test_images/hard_hat_workers84.png'
# img_name = '/home/lefki/ECE766/helmet_detr/test_images/hard_hat_workers499.png'
# img_name = '/home/lefki/ECE766/helmet_detr/test_images/hard_hat_workers2459.png'
# img_name = '/home/lefki/ECE766/helmet_detr/test_images/hard_hat_workers4839.png'
im = Image.open(img_name)

run_worflow(im,
            model)