In [1]:
import tensorflow as tf

tf.enable_eager_execution()
tf.executing_eagerly()

True

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Reshape, Conv2D, Input, MaxPooling2D, BatchNormalization, Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNet
import numpy as np
from utils import read_labels, draw_image, image_to_yolo_input, Object, parse_annotation, Annotation, calculate_IoU, image_to_vgg_input, image_to_mobilenet_input, LabelEncoder
from augmentation import read_image, display_image, change_brightness_slightly, change_brightness_not_so_slightly, dropout, adjust_contrast, grayscale, noise, blur, sharpen
from PIL import Image as Img
import random
import os
import matplotlib.pyplot as plt
import time
import cv2
import uuid
import shutil

In [40]:
#TODO: read from config

image_width = 416
image_height = 416
grid_width = int(image_width / 32) # 13
grid_height = int(image_height / 32) # 13

cell_width = image_width / grid_width
cell_height = image_height / grid_height

boxes = 5

activation_alpha = 0.1

object_scale = 20
noobject_scale = 1
class_scale = 4
coord_scale = 4

threshhold = 0.5
nms_threshhold = 0.5#0.5

anchors = np.array([[1.05, 1.65], [2.44, 4.13], [4.01, 8.46], [7.62, 5.13], [9.97, 10.43]], dtype = np.float32) # obtained from KMeans experiments ipynb

batch_size = 128

labels_dir = "./labels.txt"

annotation_folder = '.\VOCdevkit\VOC2007\Annotations'
images_folder = '.\VOCdevkit\VOC2007\JPEGImages'

murka = r'.\mytestimages\murka.jpg'

test_annotation = r'.\VOCdevkit\VOC2007\Annotations\000113.xml'
test_image = r'.\VOCdevkit\VOC2007\JPEGImages\000113.jpg'

In [60]:
labels, labels_count = read_labels(labels_dir)

encoder = LabelEncoder(labels)

print(labels_count)
print(labels)

20
['horse', 'diningtable', 'sofa', 'train', 'bird', 'aeroplane', 'person', 'boat', 'bottle', 'motorbike', 'bus', 'cat', 'pottedplant', 'car', 'dog', 'bicycle', 'sheep', 'cow', 'tvmonitor', 'chair']


In [5]:
def get_tinyyolov2():
    layers = []

    layers.append(Input(shape=(image_width, image_height, 3)))

    layers.append(Conv2D(filters = 16, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_1"))
    layers.append(BatchNormalization(name = "norm_1"))
    layers.append(LeakyReLU(name = "leaky_1", alpha = activation_alpha))
    layers.append(MaxPooling2D(name = "maxpool_1"))

    layers.append(Conv2D(filters = 32, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_2"))
    layers.append(BatchNormalization(name = "norm_2"))
    layers.append(LeakyReLU(name = "leaky_2", alpha = activation_alpha))
    layers.append(MaxPooling2D(name = "maxpool_2"))

    layers.append(Conv2D(filters = 64, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_3"))
    layers.append(BatchNormalization(name = "norm_3"))
    layers.append(LeakyReLU(name = "leaky_3", alpha = activation_alpha))
    layers.append(MaxPooling2D(name = "maxpool_3"))

    layers.append(Conv2D(filters = 128, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_4"))
    layers.append(BatchNormalization(name = "norm_4"))
    layers.append(LeakyReLU(name = "leaky_4", alpha = activation_alpha))
    layers.append(MaxPooling2D(name = "maxpool_4"))

    layers.append(Conv2D(filters = 256, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_5"))
    layers.append(BatchNormalization(name = "norm_5"))
    layers.append(LeakyReLU(name = "leaky_5", alpha = activation_alpha))
    layers.append(MaxPooling2D(name = "maxpool_5"))

    layers.append(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_6"))
    layers.append(BatchNormalization(name = "norm_6"))
    layers.append(LeakyReLU(name = "leaky_6", alpha = activation_alpha))
    #layers.append(MaxPooling2D(name = "maxpool_6", pool_size = (2, 2), strides = (1, 1)))

    layers.append(Conv2D(filters = 1024, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_7"))
    layers.append(BatchNormalization(name = "norm_7"))
    layers.append(LeakyReLU(name = "leaky_7", alpha = activation_alpha))

    layers.append(Conv2D(filters = 1024, kernel_size = (3, 3), padding = "same", use_bias = False, name="conv_8"))
    layers.append(BatchNormalization(name = "norm_8"))
    layers.append(LeakyReLU(name = "leaky_8", alpha = activation_alpha))

    layers.append(Conv2D(filters = (boxes * (4 + 1 + labels_count)), kernel_size = (1, 1), padding = "same", name="conv_9"))

    layers.append(Reshape(target_shape = (grid_width, grid_height, boxes, 5 + labels_count), name = "output"))

    tinyyolov2 = Sequential(layers = layers, name = "tiny yolov2 voc")
    tinyyolov2.summary()
    
    return tinyyolov2

In [6]:
# get_annotations_images

In [8]:
assume_batch_size = 10
assume_grid_width = 3
assume_grid_height = 3
assume_boxes = 2

cell_x = np.reshape(np.repeat(np.tile(range(assume_grid_width), assume_batch_size * assume_grid_height), assume_boxes), (assume_batch_size, assume_grid_width, assume_grid_height, assume_boxes))
cell_y = np.transpose(cell_x, (0,2,1,3))

#print(cell_y.shape)
#print(cell_y)
#print('=====================================')
#print(cell_index)

cell_x = tf.to_float(tf.reshape(tf.keras.backend.repeat_elements(tf.tile(tf.range(assume_grid_width), [assume_batch_size * assume_grid_height]), assume_boxes, axis=0), 
                        (assume_batch_size, assume_grid_width, assume_grid_height, assume_boxes)))
cell_y = tf.transpose(cell_x, (0,2,1,3))

#print(cell_x)
#print('=====================================')
#print(cell_y)

W1228 00:46:59.509115 17396 deprecation.py:323] From <ipython-input-8-5e31b72149f0>:15: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.


In [9]:
'''
    intermins = tf.maximum(xy_true, xy_pred)
    intermaxes = tf.minimum(xy_true, xy_pred)

    interArea = tf.maximum(0.0, intermaxes[..., 0] - intermins[..., 0] + 1) * tf.maximum(0.0, intermaxes[..., 1] - intermins[..., 1] + 1)

    groundTruthArea = (wh_true[..., 0] + 1) * (wh_true[..., 1] + 1)
    predictedArea = (wh_pred[..., 0] + 1) * (wh_pred[..., 1] + 1)

    iou = interArea / (groundTruthArea + predictedArea - interArea)    
    
    xpred = y_pred[:, :, :, :, 1]   
    ypred = y_pred[:, :, :, :, 2]                
    xtrue = y_true[:, :, :, :, 1]
    ytrue = y_true[:, :, :, :, 2]

    wpred = y_pred[:, :, :, :, 3]
    hpred = y_pred[:, :, :, :, 4]                
    wtrue = y_true[:, :, :, :, 3]
    htrue = y_true[:, :, :, :, 4]

    xy_pred = y_pred[:, :, :, :, 1 : 3]
    xy_true = y_true[:, :, :, :, 1 : 3]

    wh_pred = y_pred[:, :, :, :, 3 : 5]
    wh_true = y_true[:, :, :, :, 3 : 5]
    '''   
    
    
    #loss = tf.reduce_mean(loss)
    #print(np.any(np.isnan(xywhcoef)))
    #print(np.any(np.isnan(((xtrue - xpred) ** 2))))
    #print(np.any(np.isnan(xywhcoef * ((xtrue - xpred) ** 2))))
    #print(loss)
    

'''
def custom_loss(y_true, y_pred):
    c_pred = tf.sigmoid(y_pred[:, :, :, :, 0])
    c_true = y_true[:, :, :, :, 0]
    
    greaters = tf.greater(c_true, 0.0)
    
    mask_shape = (batch_size, grid_width, grid_height, boxes)
    objs = tf.ones(shape = (mask_shape)) * object_scale
    noobjs = tf.ones(shape = (mask_shape)) * noobject_scale
    
    coef = tf.where(greaters, objs, noobjs) 
    
    
    
    return coef * (c_true - c_pred) ** 2
'''


#runningman = images[52]
#runningmanannot = annotations[52]

'\ndef custom_loss(y_true, y_pred):\nc_pred = tf.sigmoid(y_pred[:, :, :, :, 0])\nc_true = y_true[:, :, :, :, 0]\n\ngreaters = tf.greater(c_true, 0.0)\n\nmask_shape = (batch_size, grid_width, grid_height, boxes)\nobjs = tf.ones(shape = (mask_shape)) * object_scale\nnoobjs = tf.ones(shape = (mask_shape)) * noobject_scale\n\ncoef = tf.where(greaters, objs, noobjs) \n\n\n\nreturn coef * (c_true - c_pred) ** 2\n'

In [10]:
#print(np.array(anchors)[:, 0])

In [41]:
# (almost) complete yolov2 loss function

In [12]:
# decode, nms, feedforward

In [13]:
annotations_files, images = get_annotations_images(annotation_folder, images_folder)
#ins = np.array([image_to_mobilenet_input(image, inputshape = (image_width, image_height)) for image in images], dtype=np.float32)
#outs = np.array([encode_y_true_from_annotatoin(annotation) for annotation in annotations], dtype=np.float32)

#print(ins.shape)
#print(outs.shape)
print(f'Lens: {len(annotations_files)} {len(images)}')
print('Prepared ins & outs paths')

Lens: 5011 5011
Prepared ins & outs paths


In [14]:
start = time.time()
annotations = [parse_annotation(annotation) for annotation in annotations_files]
end = time.time()

print(f'Prepared annotations in {(end - start):.2f}s')

Prepared annotations in 1.05s


In [15]:
def batch_generator(annotations, images, batch_size, raw_files = True):
    ins = []
    outs = []
    
    
    while True:
        for index in range(len(images)):
            ins.append(image_to_mobilenet_input(images[index], inputshape = (image_width, image_height)))
            outs.append(encode_y_true_from_annotation(annotations[index], raw_files))
            
            if len(ins) == batch_size:
                yield (np.array(ins, dtype=np.float32), np.array(outs, dtype=np.float32))
                ins = []
                outs = []

def normalize_image_to_mobilenet_input(im):
    im /= 255
    im -= 0.5
    im *= 2.

    return im
                
def batch_generator_augmentation(annotations, images, batch_size, normalize_function, augmention_functions, aug_chance = 0.5, max_augs = 2, raw_files = True):
    ins = []
    outs = []
    
    
    while True:
        for index in range(len(images)):
            out = read_image(images[index], (image_width, image_height))
            
            if random.random() < aug_chance:
                
                augs = int(random.uniform(0, max_augs)) + 1
                for aug in random.choices(augmention_functions, k = augs):
                    out = aug(out)
            
        
            ins.append(out)
            outs.append(encode_y_true_from_annotation(annotations[index], raw_files))
            
            if len(ins) == batch_size:
                yield (np.array(ins, dtype=np.float32), np.array(outs, dtype=np.float32))
                ins = []
                outs = []

In [16]:
#assert not np.any(np.isnan(ins))
#assert not np.any(np.isnan(outs))

In [17]:
generator_preprocessed = batch_generator(annotations, images, batch_size, raw_files=False)

start = time.time()
next(generator_preprocessed)
end = time.time()

print(f'Batch generated in {(end - start):.2f}s')

Batch generated in 1.57s


In [18]:
generator_raw = batch_generator(annotations_files, images, batch_size, raw_files=True)

start = time.time()
next(generator_raw)
end = time.time()

print(f'Batch generated in {(end - start):.2f}s')

Batch generated in 1.12s


In [19]:
'''There's not a significant enough time difference for pre-loading of annotations to be worth it.'''

"There's not a significant enough time difference for pre-loading of annotations to be worth it."

In [20]:
augmenters = [blur, sharpen, noise, adjust_contrast, change_brightness_not_so_slightly, change_brightness_slightly, dropout, grayscale]
generator_raw = batch_generator_augmentation(annotations_files, images, batch_size, normalize_image_to_mobilenet_input, augmenters)

start = time.time()
next(generator_raw)
end = time.time()

print(f'Batch generated in {(end - start):.2f}s')

Batch generated in 2.26s


In [32]:
# augmenter

In [31]:
augmenters = [blur, adjust_contrast, change_brightness_not_so_slightly, change_brightness_slightly, dropout]
people_ann_folder = r"C:\Users\Gencho\Desktop\ObjectDetection\experiments\annotations"
people_im_folder = r"C:\Users\Gencho\Desktop\ObjectDetection\experiments\images"
testa, testb = get_annotations_images(people_ann_folder, people_im_folder)
available_count = len(testa)
generate_count = 1000

start = time.time()
augment_images(people_im_folder, people_ann_folder, augmenters, target_count = available_count + generate_count)
end = time.time()

print(f'Generated {generate_count} new images in {(end - start):.2f}s')

Generated 1000 new images in 55.22s


In [27]:
def image_to_mobilenet_input_opencv(path, inputshape):
    im = cv2.imread(path)
    im = cv2.resize(im, inputshape)
    
    im = np.array(im, np.float32)
    im = im[..., ::-1]
    im /= 255
    im -= 0.5
    im *= 2.
    
    return im

start = time.time()
a = image_to_mobilenet_input_opencv(test_image, inputshape = (image_width, image_height))
end = time.time()

print(f'Image prepared in: {(end - start):.5f}s')

start = time.time()
b = image_to_mobilenet_input(test_image, inputshape = (image_width, image_height))
end = time.time()

print(f'Image prepared in: {(end - start):.5f}s')
'''There's not a significant time difference when using opencv.'''
#assert np.array_equal(a, b)

Image prepared in: 0.00700s
Image prepared in: 0.00700s


"There's not a significant time difference when using opencv."

In [None]:
#print(a[0])
#print('==================')
#print(b[0])

In [38]:
# getmobilenetyolov2

In [42]:
mobilenetyolov2 = get_mobilenetyolov2()

adam = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
mobilenetyolov2.compile(optimizer = adam, loss = custom_loss)

print('Model compiled')

Model compiled


In [None]:
#testbatchins = ins[:batch_size]
#testbatchouts = outs[:batch_size]
#testbatchins = np.array([image_to_mobilenet_input(test_image, (image_width, image_height))], dtype=np.float32)
#testbatchouts = np.array([encode_y_true_from_annotatoin(test_annotation)], dtype=np.float32)
    
#pred = mobilenetyolov2.predict(testbatchins)
#loss = custom_loss(testbatchouts, pred)

#assert not np.any(np.isnan(pred))
#assert not np.any(np.isnan(loss))

In [None]:
#mobilenetyolov2.load_weights('./weights/mobilenetyolov2try07abitofaugmentation')

In [None]:
#gen = tf.keras.preprocessing.image.ImageDataGenerator()

epochs = 250
augmenters = [blur, sharpen, noise, adjust_contrast, change_brightness_not_so_slightly, change_brightness_slightly, dropout, grayscale]
generator_raw = batch_generator_augmentation(annotations_files, images, batch_size, normalize_image_to_mobilenet_input, augmenters)


h = mobilenetyolov2.fit_generator(generator_raw, steps_per_epoch = len(images) // batch_size, epochs = epochs)

In [None]:
#mobilenetyolov2.save_weights('./weights/mobilenetyolov2try07abitofaugmentation')

In [43]:
'''
def filter_people(annotations, images):
    people_ann = []
    people_im = []
    
    for i in range(len(images)):
        image, annotation = images[i], annotations[i]
        if any(obj.name == "person" for obj in parse_annotation(annotation).objects):
            people_ann.append(annotation)
            people_im.append(image)
                
    return people_ann, people_im

people_ann, people_im = filter_people(annotations_files, images)



ann_target_folder = r"C:\Users\Gencho\Desktop\ObjectDetection\experiments\annotations"
im_target_folder = r"C:\Users\Gencho\Desktop\ObjectDetection\experiments\images"

for i in range(len(people_ann)):
    shutil.copy(people_ann[i], f'{ann_target_folder}\\{i}.xml')
    shutil.copy(people_im[i], f'{im_target_folder}\\{i}.jpg')
'''

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 479-480: truncated \UXXXXXXXX escape (<ipython-input-43-a46673e52271>, line 24)

In [None]:
#threshhold = 0.5

In [None]:
index = 0

In [None]:
index -= 1

In [None]:
threshhold = 0.2
objs = feed_forward(mobilenetyolov2, images[index], True)
for obj in objs:
    print(obj)
objects_trough_nms = group_nms(objs)
draw_image(images[index], objects_trough_nms, draw_grid = True, grid_size = (grid_width, grid_height))
for obj in objects_trough_nms:
    print(obj)
threshhold = 0.5
index += 1

In [None]:
#index = 52
#runningman = images[52]
'''
encoded = encode_y_true_from_annotatoin(annotations[index])
for row in range(grid_width):
    for col in range(grid_height):
        for box in range(boxes):
            if encoded[row, col, box, 0]== 1:
                print(f'{row} {col} {box}')
'''

threshhold = 0.1
objs = feed_forward(mobilenetyolov2, murka, False)
objs = nms(objs)
for obj in objs:
    print(obj)
draw_image(murka, objs)
threshhold = 0.5

In [None]:
'''
things to consider:
Lambda layer which decodes output
'''

In [None]:
'''
TODO:
    possible consideration:
        if the network makes a prediction with an IoU > 0.6 in a detector which was not chosen for the object do not penalise it
      
    train on entire voc 2012 + 2007 not only 2007
'''

In [None]:
'''Sanity check if images are preprocessed correctly for mobilenet'''
#mobilenet = MobileNet(weights = 'imagenet')

#from tensorflow.keras.applications.mobilenet import decode_predictions

#test = np.array([image_to_mobilenet_input(r'.\VOCdevkit\VOC2007\JPEGImages\000019.jpg', inputshape=(224, 224))])
#res = mobilenet.predict(test)
#print(decode_predictions(res))