# DETR

## Version 0.4

Experiment mit standard DETR und export predicts

In [None]:
import os
import sys
import requests
import datetime
import pandas as pd
import numpy as np
import torch, torchvision, torchaudio
import pytorch_lightning as pl
import wandb
import json
import copy
import shutil
import matplotlib.pyplot as plt
import time


from torchmetrics.detection import MeanAveragePrecision
from coco_eval import CocoEvaluator
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import WandbLogger
from transformers import AutoImageProcessor, DetrForObjectDetection
from transformers import DetrConfig, DetrImageProcessor
from pytorch_lightning import Trainer
from PIL import Image, ImageDraw

# Training

## Loads Data

In [None]:


class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self, img_folder, processor, image_name):
        ann_file = os.path.join(img_folder, image_name)
        super(CocoDetection, self).__init__(img_folder, ann_file)
        self.processor = processor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        # feel free to add data augmentation here before passing them to the next step
        img, target = super(CocoDetection, self).__getitem__(idx)

        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.processor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target

processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create Train and Validation Dataset
  #Minimal Dataset
# train_dataset = CocoDetection(img_folder='../../../old/BAA/Data/train', processor=processor, image_name="coco_train.json")
# val_dataset = CocoDetection(img_folder='../../../old/BAA/Data/val', processor=processor, image_name="coco_val.json")

  #Maximal Dataset
train_dataset = CocoDetection(img_folder='../../../old/BAA/Data/train_max', processor=processor, image_name="coco_train_max.json")
val_dataset = CocoDetection(img_folder='../../../old/BAA/Data/val_max', processor=processor, image_name="coco_val_max.json")

print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(val_dataset))

cats = train_dataset.coco.cats
id2label = {k: v['name'] for k,v in cats.items()}

def collate_fn(batch):
  pixel_values = [item[0] for item in batch]
  encoding = processor.pad(pixel_values, return_tensors="pt")
  labels = [item[1] for item in batch]
  batch = {}
  batch['pixel_values'] = encoding['pixel_values']
  batch['pixel_mask'] = encoding['pixel_mask']
  batch['labels'] = labels
  return batch

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=2, shuffle=True)
val_dataloader = DataLoader(val_dataset, collate_fn=collate_fn, batch_size=2)
batch = next(iter(train_dataloader))



## define DETR

In [None]:



class Detr(pl.LightningModule):
		def __init__(self, lr, lr_backbone, weight_decay):
			super().__init__()
			# replace COCO classification head with custom head
			# we specify the "no_timm" variant here to not rely on the timm library
			# for the convolutional backbone
			self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50",
																revision="no_timm",
																num_labels=len(id2label),
																ignore_mismatched_sizes=True)
			# see https://github.com/PyTorchLightning/pytorch-lightning/pull/1896
			self.processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
			self.cmetric = MeanAveragePrecision(iou_type="bbox")
			self.val_epoch_count = 0 # to not log at the first run
			self.save_hyperparameters()
			self.lr = lr
			self.lr_backbone = lr_backbone
			self.weight_decay = weight_decay
			self.training_step_pred = []
			self.training_step_targ = []
			self.validation_step_pred = []
			self.validation_step_targ = []

		def forward(self, pixel_values, pixel_mask):
			outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

			return outputs

		def common_step(self, batch, batch_idx, t_v):
			pixel_values = batch["pixel_values"]
			pixel_mask = batch["pixel_mask"]
			labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]
			outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)
			loss = outputs.loss
			loss_dict = outputs.loss_dict
			
			if t_v:
				self.data_prep_train(pixel_values, pixel_mask, labels)
			else:
				self.data_prep_val(pixel_values, pixel_mask, labels)
			

			return loss, loss_dict

		def training_step(self, batch, batch_idx):
			loss, loss_dict = self.common_step(batch, batch_idx, True)
			# logs metrics for each training_step,
			# and the average across the epoch
			self.log("training_loss", loss)
			for k,v in loss_dict.items():
				self.log("train_" + k, v.item())
			return loss

		def on_train_epoch_end(self) -> None:
			print("onTrainEpoch")
			t1 = time.time()
			self.cmetric = MeanAveragePrecision(iou_type="bbox")
			self.cmetric.update(self.training_step_pred, self.training_step_targ)
			result = self.cmetric.compute()
			counter = 0
			for k, v in result.items():
				counter += 1
				if counter < 15:
					wandb.log({"train_epoch_" + k: v.item()})
					print({"train_" + k: v.item()})
			self.training_step_pred = []
			self.training_step_targ = []
			t2 = time.time()
			print(round(t2 - t1, 4))
			
		# temp
		"""def on_train_end(self) -> None:
			print("onTrainEnd")
			t1 = time.time()
			self.cmetric = MeanAveragePrecision(iou_type="bbox")
			self.cmetric.update(self.training_step_pred, self.training_step_targ)
			result = self.cmetric.compute()
			counter = 0
			for k, v in result.items():
				counter += 1
				if counter < 15:
					wandb.log({"train_" + k: v.item()})
			self.training_step_pred = []
			self.training_step_targ = []
			t2 = time.time()
			print(round(t2 - t1, 4))"""

		def validation_step(self, batch, batch_idx):
			loss, loss_dict = self.common_step(batch, batch_idx, False)
			self.log("validation_loss", loss)
			for k,v in loss_dict.items():
				self.log("validation_" + k, v.item())

			return loss
		
		def on_validation_epoch_end(self) -> None:
			if self.val_epoch_count == 1:
				print("onValEpoch")
				t1 = time.time()
				self.cmetric = MeanAveragePrecision(iou_type="bbox")
				self.cmetric.update(self.training_step_pred, self.training_step_targ)
				result = self.cmetric.compute()
				counter = 0
				for k, v in result.items():
					counter += 1
					if counter < 15:
						wandb.log({"validation_epoch_" + k: v.item()})
						print({"validation_epoch_" + k: v.item()})
				self.validation_step_pred = []
				self.validation_step_targ = []
				t2 = time.time()
				print(round(t2 - t1, 4))
			else:
				self.val_epoch_count = 1

			
		#temp
		"""def on_validation_end(self) -> None:
			print("onValEnd")
			t1 = time.time()
			self.cmetric = MeanAveragePrecision(iou_type="bbox")
			self.cmetric.update(self.training_step_pred, self.training_step_targ)
			result = self.cmetric.compute()
			counter = 0
			for k, v in result.items():
				counter += 1
				if counter < 15:
					wandb.log({"validation_" + k: v.item()})
			self.validation_step_pred = []
			self.validation_step_targ = []
			t2 = time.time()
			print(round(t2 - t1, 4))"""

		def configure_optimizers(self):
			param_dicts = [
				{"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
				{
					"params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
					"lr": self.lr_backbone,
				},
			]
			optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
									weight_decay=self.weight_decay)

			return optimizer

		def train_dataloader(self):
			return train_dataloader

		def val_dataloader(self):
			return val_dataloader
		
		# EVALUATION

		def convert_to_xywh(self, boxes):
			xmin, ymin, xmax, ymax = boxes.unbind(1)
			return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)

		def prepare_for_coco_detection(self, predictions):
			#used for the evaluation 
			coco_results = []
			for original_id, prediction in predictions.items():
				if len(prediction) == 0:
					continue

				boxes = prediction["boxes"]
				boxes = self.convert_to_xywh(boxes).tolist()
				scores = prediction["scores"].tolist()
				labels = prediction["labels"].tolist()

				coco_results.extend(
					[
						{
							"image_id": original_id,
							"category_id": labels[k],
							"bbox": box,
							"score": scores[k],
						}
						for k, box in enumerate(boxes)
					]
				)
			return coco_results
		
		def data_prep_train(self, pixel_values, pixel_mask, labels):

			with torch.no_grad():
				outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

			orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
			results = self.processor.post_process_object_detection(outputs, target_sizes=orig_target_sizes, threshold=0)
			for n in results:
				self.training_step_pred.append(n)
			for i in labels:
				self.training_step_targ.append({"boxes":i["boxes"], "labels":i["class_labels"]})

		def data_prep_val(self, pixel_values, pixel_mask, labels):

			with torch.no_grad():
				outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

			orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
			results = self.processor.post_process_object_detection(outputs, target_sizes=orig_target_sizes, threshold=0)
			for n in results:
				self.validation_step_pred.append(n)
			for i in labels:
				self.validation_step_targ.append({"boxes":i["boxes"], "labels":i["class_labels"]})


## Training

In [None]:
# Training




# login to weights and biases, to relogin: wandb.login(key="YOUR KEY", relogin=True)
wandb.login()

date = datetime.datetime.now()
run_name = "Run at the {}".format(date)

lr = 1e-4
lr_backbone=1e-5
weight_decay=1e-4

project = "BAA_Book_Damage_Detection"

wandb.init(
    project=project,
    name=run_name,
    config={"learning_rate_transformer": lr,
            "learning_rate_backbone": lr_backbone,
            "weight_decay": weight_decay,
            "architecture": "DETR",
            "dataset": "test_minimal_set",
            "train_set_size": len(train_dataset)}
    )

model = Detr(lr=lr, lr_backbone=lr_backbone, weight_decay=weight_decay)

model.to(device)

outputs = model(pixel_values=batch['pixel_values'].to(device), pixel_mask=batch['pixel_mask'].to(device))
print(outputs.logits.shape)

max_steps_var = 460

wandb_logger = WandbLogger("Optimize_Logging", "../models/", project=project, log_model=True, checkpoint_name=f"DETR_{max_steps_var}_Steps")
trainer = Trainer(max_steps=max_steps_var, gradient_clip_val=0.1, logger=wandb_logger)
trainer.fit(model)

wandb.finish()

## load checkpoint

In [None]:
# load the checkpoint

run = wandb.init()
artifact = run.use_artifact('damaged-books-detection-ml/BAA_Book_Damage_Detection/optimized_logs:v1', type='model')
artifact_dir = artifact.download()

model = Detr.load_from_checkpoint(artifact_dir + "/model.ckpt")



In [None]:
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

## Evaluation

In [None]:
# Evaluation

def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)

def prepare_for_coco_detection(predictions):
    coco_results = []
    for original_id, prediction in predictions.items():
        if len(prediction) == 0:
            continue

        boxes = prediction["boxes"]
        boxes = convert_to_xywh(boxes).tolist()
        scores = prediction["scores"].tolist()
        labels = prediction["labels"].tolist()

        coco_results.extend(
            [
                {
                    "image_id": original_id,
                    "category_id": labels[k],
                    "bbox": box,
                    "score": scores[k],
                }
                for k, box in enumerate(boxes)
            ]
        )
    return coco_results




# initialize evaluator with ground truth (gt)
evaluator = CocoEvaluator(coco_gt=val_dataset.coco, iou_types=["bbox"])

print("Running evaluation...")
for idx, batch in enumerate(tqdm(val_dataloader)):
    # get the inputs
    pixel_values = batch["pixel_values"].to(device)
    pixel_mask = batch["pixel_mask"].to(device)
    labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]] # these are in DETR format, resized + normalized

    # forward pass
    with torch.no_grad():
      outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
      

    # turn into a list of dictionaries (one item for each example in the batch)
    orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
    results = processor.post_process_object_detection(outputs, target_sizes=orig_target_sizes, threshold=0)

    # provide to metric
    # metric expects a list of dictionaries, each item
    # containing image_id, category_id, bbox and score keys
    predictions = {target['image_id'].item(): output for target, output in zip(labels, results)}
    predictions = prepare_for_coco_detection(predictions)
    evaluator.update(predictions)

evaluator.synchronize_between_processes()
evaluator.accumulate()
evaluator.summarize()
print(evaluator)

## Visualisierung

In [None]:
# Visualize
for z in range(6):
    pixel_values, target = val_dataset[z]
    pixel_values = pixel_values.unsqueeze(0).to(device)

    

    annotations = val_dataset.coco.imgToAnns["image_id"]

    with torch.no_grad():
        # forward pass to get class logits and bounding boxes
        outputs = model(pixel_values=pixel_values, pixel_mask=None)

    # colors for visualization
    COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
            [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

    def plot_results(pil_img, scores, labels, boxes, anno):
        plt.figure(figsize=(16,10))
        plt.imshow(pil_img)
        ax = plt.gca()
        colors = COLORS * 100
        types = {}
        for score, label, (xmin, ymin, xmax, ymax),c  in zip(scores.tolist(), labels.tolist(), boxes.tolist(), colors):
            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                fill=False, color=c, linewidth=3))
            text = f'{id2label[label]}: {score:0.2f}'
            textx = text.split(": ")
            if textx[0] in types:
                types.update({textx[0]: types[textx[0]] + 1})
            else: 
                types.update({textx[0]:1})
            ax.text(xmin, ymin, text, fontsize=0,
                    bbox=dict(facecolor='yellow', alpha=0.0))
        print(types)
        
        # draw annotatet boxes
        typesAnno = {}
        color = [0.000, 0.000, 0.000]
        # TODO Resize bbox
        for annot in anno:
            (x, y, w, h) = annot["bbox"]
            (xmin, ymin, xmax, ymax) = (x, y, x + w, y + h)
            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                fill=False, color=color, linewidth=3))
            text = f'{id2label[annot["category_id"]]}'
            if text in typesAnno:
                typesAnno.update({text: typesAnno[text] + 1})
            else: 
                typesAnno.update({text:1})
            ax.text(xmin, ymin, text, fontsize=0,
                    bbox=dict(facecolor='yellow', alpha=0.0))
        print(typesAnno)
        plt.axis('off')
        plt.show()

    # load image based on ID
    image_id = target['image_id'].item()
    image = val_dataset.coco.loadImgs(image_id)[0]
    anno = val_dataset.coco.imgToAnns[image_id]
    image = Image.open(os.path.join('../../../old/BAA/Data/val_max', image['file_name']))

    # postprocess model outputs
    width, height = image.size
    postprocessed_outputs = processor.post_process_object_detection(outputs,
                                                                    target_sizes=[(height, width)],
                                                                    threshold=0.5)
    results = postprocessed_outputs[0]
    print(results)
    plot_results(image, results['scores'], results['labels'], results['boxes'], anno)


## Export predictions of new images

In [None]:
pixel_values, target = val_dataset[0]
pixel_values = pixel_values.unsqueeze(0).to(device)

print(pixel_values.shape)

In [None]:
def writeJson(jsonObject, filename):
    if filename.split(".")[-1] != "json":
        filename = f"{filename}.json"
    with open(filename, "w") as g:
        g.write(jsonObject)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

path_new_images = "../new_images/"
path_predictions = "../predicted_images"
input_file = "coco_new.json"
pred_coco_filename = "coco_predictions.json"
threshold_postprocessing = 0.5 # zu 50% sicher ist sich das modell, dass dies ein schaden ist

list_paths = [os.path.join(path_new_images, filename) for filename in os.listdir(path_new_images) if
                            filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]


processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

new_dataset = CocoDetection(img_folder=path_new_images, processor=processor, image_name=input_file)

print("Number of training examples:", len(new_dataset))

# Copy json if not in folder else create
pred_path = os.path.join(path_predictions, pred_coco_filename)

with open(os.path.join(path_new_images, input_file), "r") as f:
    coco_tmp = json.load(f)

if os.path.exists(pred_path):
    with open(pred_path, "r") as f:
        coco_predict = json.load(f)
    
    # append image infos
    for x in coco_tmp["images"]:
        tmp = True
        for z in coco_predict["images"]:
            if x["id"] == z["id"]:
                tmp = False
        if tmp:
            coco_predict["images"].append(x)

else:
    coco_predict = copy.copy(coco_tmp)

# start id for annotations
if coco_predict["annotations"] == []:
    id_count = 0
else:
    id_count = coco_predict["annotations"][-1]["id"] + 1

for z in range(len(list_paths)):
    pixel_values, target = new_dataset[z]
    pixel_values = pixel_values.unsqueeze(0).to(device)

    with torch.no_grad():
        # forward pass to get class logits and bounding boxes
        outputs = model(pixel_values=pixel_values, pixel_mask=None)
    
    # postprocess model outputs
    width, height = target["size"]
    p_postprocessed_outputs = processor.post_process_object_detection(outputs,
                                                                    target_sizes=[(height, width)],
                                                                    threshold=threshold_postprocessing)
    # darf nur einmal durchlaufen, sonst gibts doppelte eintrÃ¤ge
    for d in range(len(p_postprocessed_outputs[0]["labels"])):
        # Write predictions into annotations from pred_coco_filename
        # boxes == [xmin, ymin, xmax, ymax]
        box = p_postprocessed_outputs[0]["boxes"][d].tolist()
        dict_new_pred = {"id":id_count, 
                         "image_id":torch.Tensor.item(target["image_id"][0]), 
                         "category_id":int(p_postprocessed_outputs[0]["labels"][d]),
                         "segmentation":[], 
                         "bbox":p_postprocessed_outputs[0]["boxes"][d].tolist(), 
                         "ignore":0, 
                         "iscrowd":0, 
                         "area":((box[2]-box[0])*(box[3]-box[1]))}
        coco_predict["annotations"].append(dict_new_pred)
        id_count += 1

    for i in range(len(coco_tmp["images"])):
        if coco_tmp["images"][i]["id"] == torch.Tensor.item(target["image_id"][0]):
            del coco_tmp["images"][i]
    # move image to new location
    shutil.move(list_paths[z], os.path.join(path_predictions, list_paths[z].split("/")[-1]))



writeJson(json.dumps(coco_tmp, indent=4), os.path.join(path_new_images, input_file))
writeJson(json.dumps(coco_predict, indent=4), pred_path)
    

## Timestamps Testing

In [None]:
import time

In [None]:
t1 = time.time()
for x in range(9999999):
    x**2
t2 = time.time()

print(round(t2 - t1, 4))