In [25]:
import torch
from transformers import DetrConfig, DetrForObjectDetection
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import json
from transformers import DetrFeatureExtractor
import torchvision
import os
from torch.utils.data import DataLoader
from pytorch_lightning import Trainer
from detr.datasets import get_coco_api_from_dataset
from detr.datasets.coco_eval import CocoEvaluator
from tqdm.notebook import tqdm
from pathlib import Path


# Data

In [2]:
#Make dir for data
%mkdir content

'C:\MinGW\bin\mingwstartup.bat' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
#Download data from roboflow of download manual from kaggle
from roboflow import Roboflow
%cd content
rf = Roboflow(api_key="ruQpTRXWHhPyIXpWAIh5")
project = rf.workspace().project("carla-izloa")
dataset = project.version(19).download("coco")


c:\Users\student\CARLA_0.9.13\PythonAPI\Research-Project-CARLA\train\detr-pytorch\content
loading Roboflow workspace...
loading Roboflow project...
Downloading Dataset Version Zip in CARLA-19 to coco: 100% [53436436 / 53436436] bytes


Extracting Dataset Version Zip to CARLA-19 in coco:: 100%|██████████| 1866/1866 [00:01<00:00, 1183.84it/s]


In [4]:
#Return to main dir
%cd ..


c:\Users\student\CARLA_0.9.13\PythonAPI\Research-Project-CARLA\train\detr-pytorch


# Classes

In [12]:
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, feature_extractor, train=True):
        ann_file = os.path.join(img_folder, "_annotations.coco.json")
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.feature_extractor = feature_extractor

    def __getitem__(self, idx):
        img, target = super(CocoDetection, self).__getitem__(idx)
        # preprocess image and target
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.feature_extractor(
            images=img, annotations=target, return_tensors="pt")
        # remove batch dimension
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]  # remove batch dimension

        return pixel_values, target


In [22]:
# DETR MODEL, turned into lightning module
class DetrLightning(pl.LightningModule):

    def __init__(self, lr, lr_backbone, weight_decay):
        super().__init__()
        labels = self.load_labels()
        detr_config = DetrConfig(num_queries=20)#change num_queries, default on coco is 100, I use 20 since my data has only 10 classes.
        self.model = DetrForObjectDetection(detr_config).from_pretrained("facebook/detr-resnet-50",
                                                            num_labels=len(
                                                                labels),
                                                            ignore_mismatched_sizes=True)
        self.model.config.num_queries = 20
        self.lr = lr
        self.lr_backbone = lr_backbone
        self.weight_decay = weight_decay

    def load_labels(self):
        with open("../../detrCustom/labels.json", "r") as outfile:
            labels = json.load(outfile)
        return labels

    def forward(self, pixel_values, pixel_mask):
        outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)
        return outputs

    def calculate_loss(self, batch):
        #Calculate loss for certain batch
       pixel_values = batch["pixel_values"]
       pixel_mask = batch["pixel_mask"]
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

       loss = outputs.loss
       loss_dict = outputs.loss_dict

       return loss, loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.calculate_loss(batch)
        # Logging
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
          self.log("train_" + k, v.item())

        return loss

    def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.calculate_loss(batch)
        # Logging
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
          self.log("validation_" + k, v.item())

    def configure_optimizers(self):
        param_dicts = [
            {"params": [p for n, p in self.named_parameters(
            ) if "backbone" not in n and p.requires_grad]},
            {
                "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                "lr": self.lr_backbone,
            },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                      weight_decay=self.weight_decay)

        return optimizer

    def train_dataloader(self):
        return train_dataloader

    def val_dataloader(self):
        return val_dataloader


# Prep

In [14]:
feature_extractor = DetrFeatureExtractor.from_pretrained(
    "facebook/detr-resnet-50")

train_dataset = CocoDetection(
    img_folder='./content/CARLA-19/train', feature_extractor=feature_extractor)
val_dataset = CocoDetection(img_folder='./content/CARLA-19/valid',
                            feature_extractor=feature_extractor, train=False)


loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [15]:
# Get labels
cats = train_dataset.coco.cats
labels = {k: v['name'] for k,v in cats.items()}

In [16]:
# safe to use in predictions
with open(f"../../detrCustom/labels.json", "w") as outfile:
    json.dump(labels, outfile)


In [17]:
# Create dataloaders and batch with padded images to biggest size
def create_batch(batch):
    pixel_values = [item[0] for item in batch]
    encoding = feature_extractor.pad_and_create_pixel_mask(
        pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    batch = {}
    batch['pixel_values'] = encoding['pixel_values']
    batch['pixel_mask'] = encoding['pixel_mask']
    batch['labels'] = labels
    return batch

train_dataloader = DataLoader(
   train_dataset, collate_fn=create_batch, batch_size=2, shuffle=True)
val_dataloader = DataLoader(
    val_dataset, collate_fn=create_batch, batch_size=2)
batch = next(iter(train_dataloader))


In [23]:
# Create model - based on our pytorch lightning class
model = DetrLightning(lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4)


Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([14, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([14]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
%load_ext tensorboard
%tensorboard - -logdir lightning_logs/

plt.show()


ERROR: Failed to launch TensorBoard (exited with 2).
Contents of stderr:
TensorFlow installation not found - running with reduced feature set.
usage: tensorboard [-h] [--helpfull] [--logdir PATH] [--logdir_spec PATH_SPEC]
                   [--host ADDR] [--bind_all] [--port PORT]
                   [--reuse_port BOOL] [--load_fast {false,auto,true}]
                   [--extra_data_server_flags EXTRA_DATA_SERVER_FLAGS]
                   [--grpc_creds_type {local,ssl,ssl_dev}]
                   [--grpc_data_provider PORT] [--purge_orphaned_data BOOL]
                   [--db URI] [--db_import] [--inspect] [--version_tb]
                   [--tag TAG] [--event_file PATH] [--path_prefix PATH]
                   [--window_title TEXT] [--max_reload_threads COUNT]
                   [--reload_interval SECONDS] [--reload_task TYPE]
                   [--reload_multifile BOOL]
                   [--reload_multifile_inactive_secs SECONDS]
                   [--generic_data TYPE]
            

In [24]:
# Use pytorch lightning trainer to start training
trainer = Trainer(gpus=1, max_epochs=1, gradient_clip_val=0.1)
trainer.fit(model)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                   | Params
-------------------------------------------------
0 | model | DetrForObjectDetection | 41.5 M
-------------------------------------------------
41.3 M    Trainable params
222 K     Non-trainable params
41.5 M    Total params
166.050   Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Validation sanity check:  50%|█████     | 1/2 [00:02<00:02,  2.35s/it]

  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


                                                                      

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Epoch 0: 100%|██████████| 930/930 [08:41<00:00,  1.78it/s, loss=1.5, v_num=1] 


# Eval

In [None]:
git clone https://github.com/facebookresearch/detr.git

In [27]:
# Get groundtruths of the val dataset
gt_val = get_coco_api_from_dataset(val_dataset)

In [29]:
# Init cocoevaluator - this comes from the detr github
iou_types = ['bbox']
coco_evaluator = CocoEvaluator(gt_val, iou_types)

In [None]:
#Use cpu for eval, set to 'cuda' if you want to use GPU
device = torch.device("cpu")
model.to(device)
model.eval()


In [31]:
for idx, batch in enumerate(tqdm(val_dataloader)):
    # get the inputs
    pixel_values = batch["pixel_values"].to(device)
    pixel_mask = batch["pixel_mask"].to(device)
    # these are in DETR format, resized + normalized
    labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]]

    # forward pass
    outputs = model.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

    orig_target_sizes = torch.stack(
        [target["orig_size"] for target in labels], dim=0)
    # make outputs coco format to run evaluation
    results = feature_extractor.post_process(outputs, orig_target_sizes)
    res = {target['image_id'].item(): output for target,
           output in zip(labels, results)}
    coco_evaluator.update(res)


  0%|          | 0/132 [00:00<?, ?it/s]

                                                              

In [32]:
# Summarize and show results
coco_evaluator.synchronize_between_processes()
coco_evaluator.accumulate()
coco_evaluator.summarize()


Accumulating evaluation results...
DONE (t=0.35s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.042
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.083
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.036
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.008
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.065
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.382
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.076
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.143
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.153
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.061
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.246
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= la

Model can now be found in the lightning log. Check latest version and you can find the model in "checkpoint" folder.

In [38]:
import shutil


In [None]:
#Move model to models folder,change X to the right numbers found in the logs folder.
shutil.move(
    'lightning_logs/version_1/checkpoints/epoch=0-step=797.ckpt', '../../models')
