In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras import Model, Input
import numpy as np
from tqdm import tqdm
from model_HANK import YOLO_HANK

2022-11-17 20:50:44.718537: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # Select GPU number 1
        tf.config.experimental.set_visible_devices(gpus[3], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

5 Physical GPUs, 1 Logical GPUs


In [4]:
# common params
IMAGE_SIZE = 448
BATCH_SIZE = 32
NUM_CLASSES = 20
MAX_OBJECTS_PER_IMAGE = 20

# dataset params
TRAIN_DATA_PATH = './pascal_voc_training_data.txt'
TRAIN_IMAGE_DIR = './VOCdevkit_train/VOC2007/JPEGImages/'
CUT_HUMAN_DIR = './VOCdevkit_train/VOC2007/JPEGImages_cut_human/'
VALID_DATA_PATH = './pascal_voc_validation_data.txt'
VALID_IMAGE_DIR = './VOCdevkit_valid/VOC2007/JPEGImages/'

# model params
CELL_SIZE = 7
BOXES_PER_CELL = 2
OBJECT_SCALE = 1
NOOBJECT_SCALE = 0.5
CLASS_SCALE = 1
COORD_SCALE = 5

# training params
LEARNING_RATE = 1e-4
EPOCHS = 5

In [5]:
import cv2
from matplotlib import pyplot as plt

# Cut Objects and Store in Objects List

In [6]:
# store every crop object
objects = [[] for _ in range(20)]

image_names = []
record_list = []
image_bboxes = [[] for _ in range(4481)]
count = 0


input_file = open(TRAIN_DATA_PATH, 'r')
for line in input_file:
    line = line.strip()
    ss = line.split(' ')
    image_names.append(ss[0])
    img = cv2.imread(TRAIN_IMAGE_DIR + ss[0])
    
    for i in range(1, len(ss), 5):
        xmin , ymin, xmax, ymax = int(ss[i]), int(ss[i+1]), int(ss[i+2]), int(ss[i+3])
        oclass = int(ss[i+4])
        crop_img = img[ymin:ymax, xmin:xmax]
        objects[oclass].append(crop_img)
        image_bboxes[count].append([xmin , ymin, xmax, ymax, oclass])
    count = count + 1

# Print object number in each class
* we found human is too much

In [10]:
for i in range(20):
    print(i, " : ", len(objects[i]))

0  :  289
1  :  380
2  :  558
3  :  373
4  :  566
5  :  234
6  :  1460
7  :  347
8  :  1268
9  :  315
10  :  280
11  :  493
12  :  371
13  :  329
14  :  4855
15  :  577
16  :  311
17  :  381
18  :  299
19  :  323


# Delete half of human by blanking them

In [11]:
# delete 1/2 of human
count = 0
j = 0
for image_name in image_names:
    img = cv2.imread(TRAIN_IMAGE_DIR + image_name)
    for image_bbox in image_bboxes[j]:
        xmin, ymin, xmax, ymax, obj_class = image_bbox[0], image_bbox[1], image_bbox[2], image_bbox[3], image_bbox[4]
        if (obj_class == 14):
            count = count + 1
        if (obj_class == 14 and (count % 2) == 0):
            image_bboxes[j].remove([xmin, ymin, xmax, ymax, obj_class])
            black_img = np.zeros((ymax-ymin,xmax-xmin,3), np.uint16)
            img[ymin:ymax, xmin:xmax] = black_img
    cv2.imwrite(CUT_HUMAN_DIR + image_name,img)   
    j = j + 1

In [12]:
import random
from PIL import Image
from random import randrange, uniform

# Generate Copy Paste Augementaion data

In [17]:
# generate class 0,3,5,7
order = [0,3,5,7]
i = 0
count = 1
j = 0
txt_path = './Augmentation.txt'
f = open(txt_path, 'w')
for image_name in image_names:
    # origin image
    img = cv2.imread(CUT_HUMAN_DIR + image_name)
    height, width, channels = img.shape
    # paste image
    img2 = img
    height2, width2, channels2 = img2.shape

    while (height <= height2 or width <= width2):
        random_pick_index = randrange(0, len(objects[order[i]]))
        img2 = objects[order[i]][random_pick_index]
        height2, width2, channels2 = img2.shape

    #print(img2.shape)
    scale = random.uniform(0.3, 1)
    #print(scale)
    height2 = int(height2*scale)
    width2 = int(width2*scale)
    img2 = cv2.resize(img2, (width2, height2))
    if (random.random()>0.5):
        img2 = cv2.flip(img2, 1)
    
    while (height2 < 0.5*height and width2 < 0.5*width and height2*width2 < 0.0625*width*height):
        img2 = cv2.resize(img2, (width2*2, height2*2))
        height2, width2, channels2 = img2.shape
        #print(img.shape, "/", img2.shape)
        
    #print(img.shape, "/", img2.shape)
    # paste
    row = randrange(0, height - height2)
    col = randrange(0, width - width2)
    img[row:row+height2, col:col+width2] = img2
    dirr = './VOCdevkit_train/VOC2007/Augement_image/'
    name = 'image' + '%d' % (count) + '.jpg'
    dirr = dirr + name
    #img.save(dirr)
    cv2.imwrite(dirr,img)

    # write in txt file
    line = 'image' + '%d' % (count) + '.jpg'
    line += " %d %d %d %d %d" % (col, row, col+width2, row+height2, order[i])
    for image_bbox in image_bboxes[j]:
        xmin, ymin, xmax, ymax, obj_class = image_bbox[0], image_bbox[1], image_bbox[2], image_bbox[3], image_bbox[4]
        if ((xmin >= col+width2 or xmax <= col) or (ymin >= row+height2 or ymax <= row) or ((xmax-xmin)*(ymax-ymin) >= 6.25*width2*height2)):
            line += " %d %d %d %d %d" % (xmin, ymin, xmax, ymax, obj_class)
        
    f.write(line)
    f.write('\n')
    i = i + 1
    i = i % 4
    j = j + 1
    count = count + 1

In [19]:
order = [9,10,12,13]
i = 0
j = 0

for image_name in image_names:
    # origin image
    img = cv2.imread(CUT_HUMAN_DIR + image_name)
    height, width, channels = img.shape
    # paste image
    img2 = img
    height2, width2, channels2 = img2.shape

    while (height <= height2 or width <= width2):
        random_pick_index = randrange(0, len(objects[order[i]]))
        img2 = objects[order[i]][random_pick_index]
        height2, width2, channels2 = img2.shape

    #print(img2.shape)
    scale = random.uniform(0.3, 1)
    #print(scale)
    height2 = int(height2*scale)
    width2 = int(width2*scale)
    img2 = cv2.resize(img2, (width2, height2))
    if (random.random()>0.5):
        img2 = cv2.flip(img2, 1)
        
    while (height2 < 0.5*height and width2 < 0.5*width and height2*width2 < 0.0625*width*height):
        img2 = cv2.resize(img2, (width2*2, height2*2))
        height2, width2, channels2 = img2.shape
        #print(img.shape, "/", img2.shape)
        
    #print(img.shape, "/", img2.shape)
    # paste
    row = randrange(0, height - height2)
    col = randrange(0, width - width2)
    img[row:row+height2, col:col+width2] = img2
    dirr = './VOCdevkit_train/VOC2007/Augement_image/'
    name = 'image' + '%d' % (count) + '.jpg'
    dirr = dirr + name
    #img.save(dirr)
    cv2.imwrite(dirr,img)

    # write in txt file
    line = 'image' + '%d' % (count) + '.jpg'
    line += " %d %d %d %d %d" % (col, row, col+width2, row+height2, order[i])
    for image_bbox in image_bboxes[j]:
        xmin, ymin, xmax, ymax, obj_class = image_bbox[0], image_bbox[1], image_bbox[2], image_bbox[3], image_bbox[4]
        if ((xmin >= col+width2 or xmax <= col) or (ymin >= row+height2 or ymax <= row) or ((xmax-xmin)*(ymax-ymin) >= 6.25*width2*height2)):
            line += " %d %d %d %d %d" % (xmin, ymin, xmax, ymax, obj_class)
        
    f.write(line)
    f.write('\n')
    i = i + 1
    i = i % 4
    j = j + 1
    count = count + 1

In [20]:
order = [16,17,18,19]
i = 0
j = 0

for image_name in image_names:
    # origin image
    img = cv2.imread(CUT_HUMAN_DIR + image_name)
    height, width, channels = img.shape
    # paste image
    img2 = img
    height2, width2, channels2 = img2.shape

    while (height <= height2 or width <= width2):
        random_pick_index = randrange(0, len(objects[order[i]]))
        img2 = objects[order[i]][random_pick_index]
        height2, width2, channels2 = img2.shape

    #print(img2.shape)
    scale = random.uniform(0.3, 1)
    #print(scale)
    height2 = int(height2*scale)
    width2 = int(width2*scale)
    img2 = cv2.resize(img2, (width2, height2))
    if (random.random()>0.5):
        img2 = cv2.flip(img2, 1)
        
    while (height2 < 0.5*height and width2 < 0.5*width and height2*width2 < 0.0625*width*height):
        img2 = cv2.resize(img2, (width2*2, height2*2))
        height2, width2, channels2 = img2.shape
        #print(img.shape, "/", img2.shape)
        
    #print(img.shape, "/", img2.shape)
    # paste
    row = randrange(0, height - height2)
    col = randrange(0, width - width2)
    img[row:row+height2, col:col+width2] = img2
    dirr = './VOCdevkit_train/VOC2007/Augement_image/'
    name = 'image' + '%d' % (count) + '.jpg'
    dirr = dirr + name
    #img.save(dirr)
    cv2.imwrite(dirr,img)

    # write in txt file
    line = 'image' + '%d' % (count) + '.jpg'
    line += " %d %d %d %d %d" % (col, row, col+width2, row+height2, order[i])
    for image_bbox in image_bboxes[j]:
        xmin, ymin, xmax, ymax, obj_class = image_bbox[0], image_bbox[1], image_bbox[2], image_bbox[3], image_bbox[4]
        if ((xmin >= col+width2 or xmax <= col) or (ymin >= row+height2 or ymax <= row) or ((xmax-xmin)*(ymax-ymin) >= 6.25*width2*height2)):
            line += " %d %d %d %d %d" % (xmin, ymin, xmax, ymax, obj_class)
        
    f.write(line)
    f.write('\n')
    i = i + 1
    i = i % 4
    j = j + 1
    count = count + 1

In [None]:
order = [1,2,4,11,15]
i = 0
j = 0

for image_name in image_names:
    # origin image
    img = cv2.imread(CUT_HUMAN_DIR + image_name)
    height, width, channels = img.shape
    # paste image
    img2 = img
    height2, width2, channels2 = img2.shape

    while (height <= height2 or width <= width2):
        random_pick_index = randrange(0, len(objects[order[i]]))
        img2 = objects[order[i]][random_pick_index]
        height2, width2, channels2 = img2.shape

    #print(img2.shape)
    scale = random.uniform(0.3, 1)
    #print(scale)
    height2 = int(height2*scale)
    width2 = int(width2*scale)
    img2 = cv2.resize(img2, (width2, height2))
    if (random.random()>0.5):
        img2 = cv2.flip(img2, 1)
        
    while (height2 < 0.5*height and width2 < 0.5*width and height2*width2 < 0.0625*width*height):
        img2 = cv2.resize(img2, (width2*2, height2*2))
        height2, width2, channels2 = img2.shape
        #print(img.shape, "/", img2.shape)
        
    #print(img.shape, "/", img2.shape)
    # paste
    row = randrange(0, height - height2)
    col = randrange(0, width - width2)
    img[row:row+height2, col:col+width2] = img2
    dirr = './VOCdevkit_train/VOC2007/Augement_image/'
    name = 'image' + '%d' % (count) + '.jpg'
    dirr = dirr + name
    #img.save(dirr)
    cv2.imwrite(dirr,img)

    # write in txt file
    line = 'image' + '%d' % (count) + '.jpg'
    line += " %d %d %d %d %d" % (col, row, col+width2, row+height2, order[i])
    for image_bbox in image_bboxes[j]:
        xmin, ymin, xmax, ymax, obj_class = image_bbox[0], image_bbox[1], image_bbox[2], image_bbox[3], image_bbox[4]
        if ((xmin >= col+width2 or xmax <= col) or (ymin >= row+height2 or ymax <= row) or ((xmax-xmin)*(ymax-ymin) >= 6.25*width2*height2)):
            line += " %d %d %d %d %d" % (xmin, ymin, xmax, ymax, obj_class)
        
    f.write(line)
    f.write('\n')
    i = i + 1
    i = i % 5
    j = j + 1
    count = count + 1
    
f.close()