In [1]:
import os
import gc
import cv2
import torch
import time
import math
import random
import argparse
import statistics
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from models.gradcam import YOLOV5GradCAM
from models.yolo_v5_object_detector import YOLOV5TorchObjectDetector
from deep_utils import Box, split_extension

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
img = cv2.imread("images/dog.jpg")
input_size = img.shape[:2]
print('[INFO] Loading the model')
model_main = YOLOV5TorchObjectDetector("yolov5n.pt", torch.device('cpu'), img_size=(640, 480))
torch_img = model_main.preprocessing(img)

Fusing layers... 
Model Summary: 213 layers, 1867405 parameters, 0 gradients


[INFO] Loading the model
[INFO] Model is loaded
[INFO] fetching names from coco file


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [22]:
def empty_patch(size, device, random=True):
    if random:
        patch = torch.rand(size) # random values in [0,1]
    else:
        patch = torch.zeros(size)
    
    patch = patch.to(device)
    return torch.nn.Parameter(data=patch, requires_grad=True)

def apply_patch(image_batch, patch, location=(0,0), rotation=0, scale=(64,64)):
    batch_size = len(image_batch)

    patch = torchvision.transforms.Resize(size=scale)(patch)
    
    patch_batch = get_batched(patch, batch_size)
    mask_batch = square_mask(patch_batch.shape).to(device)


    # Radius of circle circumscribing the patch
    circle_rad = math.sqrt(scale[0] ** 2 + scale[1] ** 2)

    pad1 = math.ceil((circle_rad - patch_batch.shape[3]) / 2)
    pad2 = math.ceil((circle_rad - patch_batch.shape[2]) / 2)
    
    patch_batch = torch.nn.functional.pad(patch_batch, (pad1, pad1, pad2, pad2))
    mask_batch = torch.nn.functional.pad(mask_batch, (pad1, pad1, pad2, pad2))

    patch_batch = torchvision.transforms.functional.rotate(patch_batch, rotation)
    mask_batch = torchvision.transforms.functional.rotate(mask_batch, rotation)
    
    pad1 = image_batch.shape[3] - patch_batch.shape[3] - location[0]
    pad2 = image_batch.shape[2] - patch_batch.shape[2] - location[1]

    # Patch can only be on the image
    location = (location[0] + min(0, pad1), location[1] + min(0, pad2))
    pad1 = max(0, pad1)
    pad2 = max(0, pad2)
    
    patch_batch = torch.nn.functional.pad(patch_batch, (location[0], pad1, location[1], pad2))
    mask_batch = torch.nn.functional.pad(mask_batch, (location[0], pad1, location[1], pad2))
    
    patched_batch = (1-mask_batch) * image_batch + mask_batch * patch_batch
    return patched_batch

def get_batched(tensor, batch_size):
    return tensor.repeat((batch_size,) + (1,) * len(tensor.shape))

def square_mask(shape):
    return torch.ones(shape)

def save_tensor_img(i, filename):
    cv2.imwrite(filename, i.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255)

def show_tensor_img(i):
    plt.imshow(i.detach().cpu().numpy()[0].transpose(1, 2, 0) * 255)
    plt.show()

def loss(objs):
    max_loss = 0
    for o in objs:
        max_loss = max(o[0], max_loss)
    return max_loss

def loss_targeted(a, c, img_sz, location):
    losses = [c[0][i][0] for i in range(len(a[0][0])) if point_in_bbox(img_sz, location, a[0][0][i])]
    if len(losses) == 0:
        return 0
    return max(losses)

def train(model, patch, optimizer, train_imgs, target_classes=["dog"], placement_range=(0,0), batch_size=16, max_epochs=100, patch_snapshot_interval=100, preloaded=None):

    batches_per_epoch = math.ceil(len(train_imgs) / batch_size)
    
    for i in range(max_epochs):
        for j in range(batches_per_epoch):
            l = 0
            img_set = train_imgs[j * batch_size:(j + 1) * batch_size]
            items_done = 0
            for k in img_set:
                img = cv2.imread(k)
                img = model.preprocessing(img)
                
                chosen_location = (math.floor(random.random() * img.shape[3]), math.floor(random.random() * img.shape[2]))
                
                patched_img = apply_patch(img, patch, location=chosen_location, rotation=random.random() * 360, scale=(64,64))
                a, b, c = model(patched_img)
        
                l_sub = loss(c)
                l += l_sub
                items_done += 1

                patched_img.detach().cpu()
                del patched_img
                img.detach().cpu()
                del img

                for z in range(len(b)):
                    b[z].detach().cpu()
                    del b[z]

                for z in range(len(c)):
                    c[z].detach().cpu()
                    del c[z]
                
                del a
                del b
                del c
                del chosen_location

                if type(l_sub) is not int:
                    l_sub.detach().cpu()
                del l_sub
            gc.collect()
            
            if type(l) == int:
                continue
    
            l /= items_done
    
            l.backward()
        
            # normalize gradients by dividing l_infinity norm
            grad_linf = patch.grad.detach().abs().max()
            if grad_linf > 0:
                patch.grad /= grad_linf
                
            optimizer.step()
            optimizer.zero_grad() # zero the gradient
    
            patch.data = patch.data.clamp(0,1)

            l.detach().cpu()
            del l
            print("Batch: " + str(j) + "/" + str(batches_per_epoch))

        print("Epoch: " + str(i))
        print('\n\n')

        random.shuffle(train_imgs)

        if i % patch_snapshot_interval == 0:
            torch.save(patch, 'patch_' + str(i) + '.pt')

def location_scale_from_bbox(img_size, bbox):
    bbox_size = (bbox[3] - bbox[1], bbox[2] - bbox[0])
    bbox_center = (math.floor(bbox_size[0] / 2) + bbox[1],
                   math.floor(bbox_size[1] / 2) + bbox[0])

    # Shift center of image to 0,0
    bbox_center_shifted = (bbox_center[0] - math.trunc(img_size[0] / 2), bbox_center[1] - math.trunc(img_size[1] / 2))

    return bbox_center_shifted, bbox_size

def point_in_bbox(img_size, point, bbox):
    bbox_shifted = [bbox[0] - math.trunc(img_size[1] / 2), bbox[1] - math.trunc(img_size[0] / 2), bbox[2] - math.trunc(img_size[1] / 2), bbox[3] - math.trunc(img_size[0] / 2)]

    if point[0] >= bbox_shifted[1] and point[0] <= bbox_shifted[3] and point[1] >= bbox_shifted[0] and point[1] <= bbox_shifted[2]:
        return True
    return False

In [23]:
model = YOLOV5TorchObjectDetector("yolov5n.pt", device, img_size=input_size, confidence=0.05)

Fusing layers... 
Model Summary: 213 layers, 1867405 parameters, 0 gradients


[INFO] Model is loaded
[INFO] fetching names from coco file


In [24]:
train_imgs = os.listdir('C:/Users/OP/Desktop/python311/dataset/train/unmasked/')
train_imgs = ['C:/Users/OP/Desktop/python311/dataset/train/unmasked/' + t for t in train_imgs]

In [25]:
e = empty_patch((3, 64, 64), device)
e = torch.load('patch.pt')

In [26]:
#optimizer = torch.optim.SGD([e], lr=0.1, momentum=0.9, weight_decay=0)
optimizer = torch.optim.Adam([e], lr=0.004)

In [None]:
train(model, e, optimizer, train_imgs, max_epochs=10000, target_classes=['car'], placement_range=(200,200), patch_snapshot_interval=1)

Batch: 0/7393
Batch: 1/7393
Batch: 2/7393
Batch: 3/7393
Batch: 4/7393
Batch: 5/7393
Batch: 6/7393
Batch: 7/7393
Batch: 8/7393
Batch: 9/7393
Batch: 10/7393
Batch: 11/7393
Batch: 12/7393
Batch: 13/7393
Batch: 14/7393
Batch: 15/7393
Batch: 16/7393
Batch: 17/7393
Batch: 18/7393
Batch: 19/7393
Batch: 20/7393
Batch: 21/7393
Batch: 22/7393
Batch: 23/7393
Batch: 24/7393
Batch: 25/7393
Batch: 26/7393
Batch: 27/7393
Batch: 28/7393
Batch: 29/7393
Batch: 30/7393
Batch: 31/7393
Batch: 32/7393
Batch: 33/7393
Batch: 34/7393
Batch: 35/7393
Batch: 36/7393
Batch: 37/7393
Batch: 38/7393
Batch: 39/7393
Batch: 40/7393
Batch: 41/7393
Batch: 42/7393
Batch: 43/7393
Batch: 44/7393
Batch: 45/7393
Batch: 46/7393
Batch: 47/7393
Batch: 48/7393
Batch: 49/7393
Batch: 50/7393
Batch: 51/7393
Batch: 52/7393
Batch: 53/7393
Batch: 54/7393
Batch: 55/7393
Batch: 56/7393
Batch: 57/7393
Batch: 58/7393
Batch: 59/7393
Batch: 60/7393
Batch: 61/7393
Batch: 62/7393
Batch: 63/7393
Batch: 64/7393
Batch: 65/7393
Batch: 66/7393
Batch

In [16]:
img = cv2.imread('images/dog.jpg')
img = model.preprocessing(img)

In [17]:
patched = apply_patch(img, e, (200,200), 0, (64, 64))

In [None]:
cv2.imwrite("t.png", patched.detach().cpu()[0].numpy().transpose(1, 2, 0) * 255)

In [19]:
a, b, c = model(patched)

In [20]:
a

[[[[77, 466, 170, 686],
   [253, 131, 545, 319],
   [151, 213, 209, 257],
   [106, 382, 153, 469],
   [145, 140, 449, 581],
   [4, 719, 276, 768],
   [123, 682, 156, 717],
   [86, 66, 124, 104],
   [142, 137, 209, 240],
   [6, 710, 272, 768],
   [86, 56, 124, 87],
   [148, 214, 197, 232],
   [86, 57, 112, 75]]],
 [[2, 16, 0, 13, 1, 0, 2, 0, 1, 2, 0, 0, 0]],
 [['car',
   'dog',
   'person',
   'bench',
   'bicycle',
   'person',
   'car',
   'person',
   'bicycle',
   'car',
   'person',
   'person',
   'person']],
 [[0.7809615135192871,
   0.5763532519340515,
   0.5121225118637085,
   0.20842045545578003,
   0.18769066035747528,
   0.10011961311101913,
   0.09605149924755096,
   0.08497133105993271,
   0.08189640939235687,
   0.07323143631219864,
   0.0708186998963356,
   0.05767548084259033,
   0.05339260399341583]]]

In [21]:
torch.save(e, "patch.pt")

In [None]:
e = torch.load('patch.pt')

In [None]:
torch.cuda.empty_cache()