# Define configs


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from torch.optim import AdamW
from pathlib import Path
import torchvision.transforms.functional as F
from PIL import Image
import numpy as np
import json
import time
from tqdm.notebook import tqdm
import copy
import random
from transformers import DetrImageProcessor,DetrForObjectDetection,TrainingArguments,Trainer,TrainerCallback
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import glob
import warnings

warnings.filterwarnings('ignore')

# --- Configuration ---
IMG_PATH = Path("/kaggle/input/dac204/images/images")
MODEL_CHECKPOINT = "facebook/detr-resnet-50"
IMAGE_EXTENSION = ".jpg"

CLASSES_ID_TO_LABELS = {
    0: "N/A",
    1: "biker", 2: "car", 3: "pedestrian", 4: "trafficlight",
    5: "trafficlight-green", 6: "trafficlight-greenleft",
    7: "trafficlight-red", 8: "trafficlight-redleft",
    9: "trafficlight-yellow", 10: "trafficlight-yellowleft",
    11: "truck", 12: "arret"
}
LABEL_TO_ID = {v: k for k, v in CLASSES_ID_TO_LABELS.items()}
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")


if not IMG_PATH.exists():
    raise FileNotFoundError(f"Image directory not found: {IMG_PATH}")

print("Step 1: Imports and Setup complete.")
print(f"Image Path: {IMG_PATH}")
print(f"Number of classes: {len(CLASSES_ID_TO_LABELS)}")


2025-05-19 17:33:07.527248: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747675987.763281      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747675987.833216      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using device: cuda
Step 1: Imports and Setup complete.
Image Path: /kaggle/input/dac204/images/images
Number of classes: 13


# Load Model

In [3]:
model = DetrForObjectDetection.from_pretrained(
    "facebook/detr-resnet-50",
    num_labels=len(CLASSES_ID_TO_LABELS), 
    id2label=CLASSES_ID_TO_LABELS,       
    label2id=LABEL_TO_ID,              
    ignore_mismatched_sizes=True      
).to(DEVICE)
processor = DetrImageProcessor.from_pretrained(MODEL_CHECKPOINT)
print("Model and Processor loaded.")

config.json:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DetrForObjectDetection were not initialized from the model checkpoin

preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

Model and Processor loaded.


In [4]:
for idx, label in model.config.id2label.items():
    print(f"{idx:2d} → {label}")

 0 → N/A
 1 → biker
 2 → car
 3 → pedestrian
 4 → trafficlight
 5 → trafficlight-green
 6 → trafficlight-greenleft
 7 → trafficlight-red
 8 → trafficlight-redleft
 9 → trafficlight-yellow
10 → trafficlight-yellowleft
11 → truck
12 → arret


# Data loading

In [5]:

def load_data_paths(img_dir, ann_path):
    print(f"Scanning for images in {img_dir} and annotations {ann_path}")
    
    img_dir = Path(img_dir)
    coco = COCO(ann_path)

    img_ids = coco.getImgIds()
    imgs = coco.loadImgs(img_ids)

    img_paths = []
    missing_images = 0

    for img in tqdm(imgs):
        file_name = img['file_name']
        full_path = img_dir / file_name
        if full_path.exists():
            img_paths.append(full_path)
        else:
            missing_images += 1

    print(f"Found {len(img_paths)} correctly matched image/annotation pairs.")
    if missing_images > 0:
        print(f"Warning: {missing_images} image files listed in the annotation are missing in the image directory.")

    return img_paths, coco  # You may return coco if you want to query annotations later

In [6]:
img_paths,coco = load_data_paths(IMG_PATH,'/kaggle/input/dac204/output_coco.json')

Scanning for images in /kaggle/input/dac204/images/images and annotations /kaggle/input/dac204/output_coco.json
loading annotations into memory...
Done (t=2.38s)
creating index...
index created!


  0%|          | 0/31676 [00:00<?, ?it/s]

Found 31676 correctly matched image/annotation pairs.


In [7]:
len(img_paths)

31676

In [8]:
# Define a dataset Class which loads images and annotations 

class COCODatasetForDETR(Dataset):
    """
    input arguments:
    img_paths : list of image paths
    coco : COCO object from pycocotools.coco containing annotation data in coco format
    output:
    a dict with keys image,image_id,annotations
    """
    def __init__(self, img_paths, coco):
        self.img_paths  = img_paths
        self.coco       = coco
        self.file_to_id = {img['file_name']: img['id'] 
                           for img in coco.dataset['images']}

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path  = self.img_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}. Falling back to the first image.")
            image = Image.open(self.img_paths[0]).convert("RGB")
        image_id  = self.file_to_id[img_path.name]
        
        anns = self.coco.loadAnns(self.coco.getAnnIds(imgIds=image_id))
        
        for ann in anns:
            ann.setdefault("area", ann["bbox"][2] * ann["bbox"][3])
            ann.setdefault("iscrowd", 0)

        return {
            "image":       image,
            "image_id":    image_id,
            "annotations": {
                "annotations": anns,
                "image_id":    image_id
            }
        }


In [9]:
train_len=(int(len(img_paths)*0.75))
train_len


23757

In [10]:
test_len=(int(len(img_paths)*0.15))

In [None]:
# Test Train split:

# Shuffle with a fixed seed
import random
SEED=40
random.seed(SEED)
random.shuffle(img_paths)

train_paths = img_paths[:train_len]
test_paths = img_paths[train_len:train_len+test_len]
eval_paths = img_paths[train_len+test_len:]
print((len(img_paths),len(train_paths),len(test_paths),len(eval_paths)))

(31676, 23757, 4751, 3167)

In [None]:
train_dataset=COCODatasetForDETR(train_paths,coco)
test_dataset=COCODatasetForDETR(test_paths,coco)
eval_dataset=COCODatasetForDETR(eval_paths,coco)
print((len(train_dataset),len(test_dataset),len(eval_dataset)))

(23757, 4751, 3167)

In [13]:
model.to(DEVICE)
print()




In [None]:

# Compute class frequencies in the training set
train_img_ids = [train_dataset.file_to_id[path.name] for path in train_dataset.img_paths]
ann_ids = coco.getAnnIds(imgIds=train_img_ids)
anns = coco.loadAnns(ann_ids)
category_counts = {}
for ann in anns:
    cat_id = ann['category_id']
    category_counts[cat_id] = category_counts.get(cat_id, 0) + 1
if category_counts:
    max_count = max(category_counts.values())
    class_weights = {cat_id: max_count / count for cat_id, count in category_counts.items()}
else:
    class_weights = {}

In [15]:
category_counts


{4: 12061,
 9: 2930,
 2: 72319,
 3: 15162,
 1: 6894,
 6: 2408,
 8: 1478,
 11: 4834,
 5: 6030,
 7: 10583,
 12: 1372,
 10: 1613}

In [16]:
class_weights

{4: 5.996103142359671,
 9: 24.68225255972696,
 2: 1.0,
 3: 4.769753330695159,
 1: 10.490136350449667,
 6: 30.0328073089701,
 8: 48.93031123139377,
 11: 14.960488208522962,
 5: 11.993200663349917,
 7: 6.833506567135973,
 12: 52.71064139941691,
 10: 44.83508989460633}

In [17]:

model.empty_weight = torch.ones(model.config.num_labels + 1, device=DEVICE)
for cat_id in range(1, model.config.num_labels): 
    if cat_id in class_weights:
        model.empty_weight[cat_id] = class_weights[cat_id]
model.empty_weight[-1] = 0.1 

for cat_id in range(model.config.num_labels):
    weight = model.empty_weight[cat_id].item()
    label = model.config.id2label[cat_id]
    print(f"{label}: {weight:.4f}")
print(f"No object: {model.empty_weight[-1].item():.4f}")


N/A: 1.0000
biker: 10.4901
car: 1.0000
pedestrian: 4.7698
trafficlight: 5.9961
trafficlight-green: 11.9932
trafficlight-greenleft: 30.0328
trafficlight-red: 6.8335
trafficlight-redleft: 48.9303
trafficlight-yellow: 24.6823
trafficlight-yellowleft: 44.8351
truck: 14.9605
arret: 52.7106
No object: 0.1000


# Training


In [None]:
def detr_data_collator(features, processor):
    """
    Data collator for DETR, using the processor.
    Takes a list of features (dictionaries from the dataset) and batches them.
    """
    # Separate images and annotations from the list of features
    pixel_values = [feature['image'] for feature in features]
    # The annotations are expected by the processor in a specific nested format
    annotations = [feature['annotations'] for feature in features]
    
    batch = processor(images=pixel_values, annotations=annotations, return_tensors="pt")
    # The processor output is directly usable by the model during training.

    return batch

print("DETR Data Collator function defined.")


DETR Data Collator function defined.


In [19]:
# Define training arguments

output_dir = '/kaggle/working/detr_training_output_hf_epoch_no_map'
os.makedirs(output_dir, exist_ok=True)

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=9,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    learning_rate=1e-4,
    weight_decay=1e-4,

    # --- Logging, Saving & Evaluation Strategies ---
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",

    # --- Checkpoint Management ---
    save_total_limit=3,
    load_best_model_at_end=True,
    greater_is_better=False,

    remove_unused_columns=False,
    report_to="tensorboard",
    fp16=torch.cuda.is_available(),
)

print("TrainingArguments defined (without custom metrics):")
print(f"- Output Directory: {training_args.output_dir}")
print(f"- Strategy (Log/Eval/Save): {training_args.eval_strategy}")
print(f"- Metric for Best Model: {'eval_loss' if training_args.load_best_model_at_end else 'N/A (using last model)'}") # Clarify default behavior
print(f"- Using FP16: {training_args.fp16}")



TrainingArguments defined (without custom metrics):
- Output Directory: /kaggle/working/detr_training_output_hf_epoch_no_map
- Strategy (Log/Eval/Save): IntervalStrategy.EPOCH
- Metric for Best Model: eval_loss
- Using FP16: True


In [20]:
# Freezing Backbone parameters (e.g., ResNet)
print("Freezing the Backbone")
for param in model.model.backbone.parameters():
    param.requires_grad = False
print("Backbone frozen.")

total_params = 0
trainable_params = 0
for name, param in model.named_parameters():
    total_params += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()


print(f"\nTotal parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")
print(f"Percentage Trainable: {100 * trainable_params / total_params:.2f}%")





Freezing the Backbone
Backbone frozen.

Total parameters: 41504722
Trainable parameters: 18049810
Percentage Trainable: 43.49%


In [None]:
# function to show map after every epoch on val dataset

def calculate_map_from_dataset(model, processor, dataset, coco_gt, device):
    """
    Compute COCO mAP (@.5:.95) for a DETR model over `dataset`,
    showing a tqdm bar and safely mapping HF labels → COCO category_ids.
    """
    model.eval().to(device)

    coco_cat_ids = coco_gt.getCatIds()
    coco_cats    = coco_gt.loadCats(coco_cat_ids)
    name2coco    = {cat["name"]: cat["id"] for cat in coco_cats}

    coco_results = []

    with torch.no_grad():
        for item in tqdm(dataset, desc="Evaluating", leave=False):
            image, image_id = item["image"], item["image_id"]
            w, h = image.size

            inputs = processor(images=image, return_tensors="pt").to(device)
            outputs = model(**inputs)

            target_sizes = torch.tensor([[h, w]], device=device)
            results = processor.post_process_object_detection(
                outputs, target_sizes=target_sizes, threshold=0.0
            )[0]

            for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                label_name  = model.config.id2label[int(label)]
                coco_cat_id = name2coco.get(label_name)
                if coco_cat_id is None:
                    continue

                xmin, ymin, xmax, ymax = box.tolist()
                width, height = xmax - xmin, ymax - ymin
                coco_results.append({
                    "image_id":    image_id,
                    "category_id": coco_cat_id,
                    "bbox":        [round(xmin,3), round(ymin,3),
                                    round(width,3), round(height,3)],
                    "score":       round(float(score), 5),
                })

    # load predictions & run COCOeval
    coco_dt   = coco_gt.loadRes(coco_results)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    img_ids   = sorted({res["image_id"] for res in coco_results})
    coco_eval.params.imgIds = img_ids

    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats[0], coco_eval  # stats[0] is mAP @ .5:.95

def compute_metrics_fn(eval_pred):
    map50_95, _ = calculate_map_from_dataset(
        model,
        processor,
        eval_dataset,
        coco,
        DEVICE
    )
    return {"mAP@.5:.95": map50_95}

# function to save model every epoch
class SavePTHCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        epoch = int(state.epoch)
        model = kwargs["model"]
        output_dir = args.output_dir
        os.makedirs(output_dir, exist_ok=True)
        path = os.path.join(output_dir, f"pytorch_model_epoch_{epoch}.pth")
        # save only the state dict
        torch.save(model.state_dict(), path)
        print(f"\n→ Saved .pth checkpoint to: {path}")


In [22]:
#Set up Trainer with compute_metrics
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,      
    data_collator=lambda features: detr_data_collator(features, processor), # Use custom collator
    compute_metrics=compute_metrics_fn,
    tokenizer=processor,
    callbacks=[SavePTHCallback]
)


print("\nHugging Face Trainer instantiated.")
print(f"Training will run for {trainer.args.num_train_epochs} epochs.")
print(f"Evaluation (loss calculation) will be performed every epoch on {len(eval_dataset)} samples.")
print(f"Checkpoints will be saved every epoch in {trainer.args.output_dir}.")
print(f"The best model will be determined based on eval_loss.")


Hugging Face Trainer instantiated.
Training will run for 9 epochs.
Evaluation (loss calculation) will be performed every epoch on 100 samples.
Checkpoints will be saved every epoch in /kaggle/working/detr_training_output_hf_epoch_no_map.
The best model will be determined based on eval_loss.


In [23]:
# print(" Starting Training ")
# train_result = trainer.train()
# print("Training Finished")

# trainer.log_metrics("train", train_result.metrics)
# trainer.save_metrics("train", train_result.metrics)
# trainer.save_state() 

# torch.save(model.state_dict(), "/kaggle/working/model_weights_9e.pth")

In [None]:
# trainer.save_state() 

# Continue Training

In [27]:
DEVICE

device(type='cuda')

In [29]:
# Redifine training_args with more epochs 

output_dir = './detr_training_output_hf_epoch_no_map'
os.makedirs(output_dir, exist_ok=True)

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=38,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=1e-4,
    weight_decay=1e-4,

    # --- Logging, Saving & Evaluation Strategies ---
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",

    # --- Checkpoint Management ---
    save_total_limit=3,
    load_best_model_at_end=True,
    greater_is_better=False,

    remove_unused_columns=False,
    report_to="tensorboard",
    fp16=torch.cuda.is_available(),
)



In [30]:
# freeze the backbone
print("Freezing the Backbone")
for param in model.model.backbone.parameters():
    param.requires_grad = False
print("Backbone frozen.")

total_params = 0
trainable_params = 0
for name, param in model.named_parameters():
    total_params += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()


print(f"\nTotal parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")
print(f"Percentage Trainable: {100 * trainable_params / total_params:.2f}%")

Freezing the Backbone
Backbone frozen.

Total parameters: 41504722
Trainable parameters: 18049810
Percentage Trainable: 43.49%


In [31]:
# redefine the trainer with new training args
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=lambda features: detr_data_collator(features, processor),
    compute_metrics=compute_metrics_fn,
    tokenizer=processor,
    callbacks=[SavePTHCallback]
)


In [None]:
# # the below code is for, when resuming the trainer, the torch.load weights_only=False error occurs 
# # this code forces torch.load to always have weights_only=False


# _original_torch_load = torch.load
# # Define a wrapper that forces weights_only=False
# def patched_torch_load(*args, **kwargs):
#     if 'weights_only' in kwargs:
#         kwargs['weights_only'] = False
#     return _original_torch_load(*args, **kwargs)
# # Apply the patch
# torch.load = patched_torch_load

In [None]:
# Resume training from 29 epochs to 38 epochs - time 11.5 hrs

checkpointpath="/kaggle/input/checkpoint-for-detrmodel-29e/checkpoint29e"

print(" Resuming Training ")

trainer.train(resume_from_checkpoint=checkpointpath)

print("Training Finished")

 Resuming Training 


	per_device_train_batch_size: 8 (from args) != 4 (from trainer_state.json)


Epoch,Training Loss,Validation Loss,Map@.5:.95
30,1.0539,1.129132,0.1894
31,1.044,1.156059,0.187258
32,1.0302,1.132247,0.208304
33,1.0224,1.125498,0.202899
34,1.0019,1.104761,0.198934
35,0.9908,1.113269,0.210011
36,0.9783,1.091424,0.219676
37,0.9701,1.102236,0.212079
38,0.96,1.088532,0.213035


Error loading image /kaggle/input/dac204/images/images/28901.jpg: image file is truncated (4 bytes not processed). Falling back to the first image.

→ Saved .pth checkpoint to: ./detr_training_output_hf_epoch_no_map/pytorch_model_epoch_30.pth


Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.59s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.189
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.349
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.182
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.049
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.302
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.607
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.112
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.262
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.284
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.55s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.187
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.368
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.175
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.045
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.285
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.602
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.119
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.258
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.273
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.54s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.208
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.390
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.184
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.052
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.309
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.600
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.146
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.282
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.298
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.53s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.203
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.378
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.177
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.045
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.333
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.582
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.140
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.288
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.302
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.55s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.199
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.371
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.176
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.051
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.300
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.605
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.128
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.279
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.291
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.51s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.210
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.394
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.185
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.053
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.306
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.613
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.133
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.278
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.295
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.52s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.220
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.394
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.206
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.059
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.323
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.620
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.136
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.287
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.304
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.62s).
Accumulating evaluation results...
DONE (t=0.12s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.212
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.384
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.202
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.056
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.322
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.595
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.143
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.282
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.295
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.58s).
Accumulating evaluation results...
DONE (t=0.13s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.213
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.381
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.221
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.058
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.317
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.608
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.133
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.279
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.297
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=10

Could not locate the best model at ./detr_training_output_hf_epoch_no_map/checkpoint-89100/pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.


Training Finished


In [35]:
torch.save(model.state_dict(), "/kaggle/working/model_weights_38e.pth")

In [36]:
trainer.save_model(output_dir)