In [1]:
import sys
sys.path.append('..')
from utils.pytorch_helper import *
from torch.amp import GradScaler
from torch.utils.data import DataLoader
from transformers import DetrImageProcessor, DetrForObjectDetection
# with help of https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb

In [None]:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
images_dir = "../../data/merged/images/train"
annotations_dir = "../../data/merged/labels/train"
train_dataset = LicensePlateDataset(images_dir, annotations_dir, processor=processor)
train_subset = get_subset(train_dataset, fraction=0.003) # for testing
train_loader = DataLoader(train_subset, batch_size=8, shuffle=True, collate_fn=collate_fn_detr)

# Validation Dataset
validation_images_dir = "../../data/merged/images/val"
validation_annotation_dir = "../../data/merged/labels/val"
val_dataset = LicensePlateDataset(validation_images_dir, validation_annotation_dir)
val_subset = get_subset(val_dataset, fraction=0.1) # for testing
val_loader = DataLoader(val_subset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [3]:
# use pretrained weights
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm", num_labels=2, ignore_mismatched_sizes=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Load Detr Model to {device}")

# scaler helps me with fitting larger batches in the GPU memory, lowers the precision of floats
scaler = GradScaler("cuda") 
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.002, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([3, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Load Detr Model to cuda


In [4]:
best_mAP50 = 0.0
best_mAP50_95 = 0.0
stopping_counter = 0
patience = 1
num_epochs = 3
start_epoch = 0

continue_training = False
if continue_training:
    # Faster RCNN Resnet50:
    # Epoch 0 Validation: mAP50 = 0.9146, mAP50-95 = 0.6440
    # Epoch 1 Validation: mAP50 = 0.9233, mAP50-95 = 0.6387
    # -> Epoch 2 Validation: mAP50 = 0.9268, mAP50-95 = 0.6613
    # Faster RCNN Mobilenet V3:
    # Epoch 0 Validation: mAP50 = 0.8868, mAP50-95 = 0.6464
    # Epoch 1 Validation: mAP50 = 0.8908, mAP50-95 = 0.6532
    # -> Epoch 2 Validation: mAP50 = 0.8997, mAP50-95 = 0.6668
    # Epoch 3 Validation: mAP50 = 0.8878, mAP50-95 = 0.6806
    checkpoint = torch.load(f"../../models/best_detr.pth") # , weights_only=False
    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    lr_scheduler.load_state_dict(checkpoint["lr_scheduler_state_dict"])
    start_epoch = checkpoint.get("epoch", -1) + 1
    best_mAP50 = checkpoint.get("mAP50", 0)
    best_mAP50_95 = checkpoint.get("mAP50-95", 0)
    print(f"Checkpoint loaded! Start epoch = {start_epoch}")

In [None]:
@torch.inference_mode()
def detr_predict(model, img, conf_threshold=0.001, **kwargs):
    model.eval()
    processor = kwargs["processor"]
    
    encoding = processor(img, return_tensors="pt").to(device)
    outputs = model(**encoding)
    
    confidences = outputs.logits.softmax(-1)[..., :-1].max(-1)[0].cpu().numpy()
    boxes = outputs.pred_boxes.cpu().numpy()
    
    valid_indices = confidences > conf_threshold
    confidences = confidences[valid_indices]
    boxes = boxes[valid_indices]
    
    return confidences.tolist(), boxes.tolist()


@torch.inference_mode()
def evaluate_detr(model, data_loader, device, processor):
    model.eval()
    
    all_imges = []
    all_bboxes = []
    for images, targets in data_loader:
        all_imges.extend([img.to(device) for img in images])
        all_bboxes.extend([target["boxes"].cpu().numpy().tolist() for target in targets])
       
    evaluator = ObjectDetectionEvaluator(model, all_imges, all_bboxes, fasterrcnn_predict, processor = processor)
    metric_summary = evaluator.get_metric_summary(verbose=False)
    
    return metric_summary

In [6]:
image = cv2.imread("../../data/merged/images/test/img_dataset_l26543.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

encoding = processor(image, return_tensors="pt").to(device)
outputs = model(**encoding)

In [None]:
for epoch in range(start_epoch, num_epochs):
    train_loss = train_one_epoch(model, optimizer, train_loader, device, epoch, scaler, processor)
    print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}")
    
    lr_scheduler.step()
    
    metrics = evaluate(model, val_loader, device=device) 
    #mAP50 = metrics['mAP50']
    #mAP50_95 = metrics['mAP50-95']
    #print(f"Epoch {epoch} Validation: mAP50 = {mAP50:.4f}, mAP50-95 = {mAP50_95:.4f}")
    #if mAP50 > best_mAP50 or mAP50_95 > best_mAP50_95:
    #    best_mAP50 = max(mAP50, best_mAP50)
    #    best_mAP50_95 = max(mAP50_95, best_mAP50_95)
    #    stopping_counter = 0
    #    torch.save({
    #        "epoch": epoch,
    #        "model_state_dict": model.state_dict(),
    #        "optimizer_state_dict": optimizer.state_dict(),
    #        "lr_scheduler_state_dict": lr_scheduler.state_dict(),
    #        "mAP50": best_mAP50,
    #        "mAP50-95": best_mAP50_95
    #    }, f"../../models/best_bestdetr.pth")
    #    print(f"Model saved at epoch {epoch}.")
    #else:
    #    stopping_counter += 1
    #
    #if stopping_counter == patience:
    #    print("Early stopping triggered. Stop training.")
    #    break

Processing batches:   0%|          | 0/97 [00:00<?, ?it/s]It looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
Processing batches:  22%|██▏       | 21/97 [00:41<02:31,  2.00s/it]


KeyboardInterrupt: 