In [110]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
import os

import xml.etree.ElementTree as ET
import cv2

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dropout, MaxPooling2D
from tensorflow.keras.optimizers import Adam

In [111]:
def getBB(file_path):
    try:
        tree = ET.parse(file_path)
    except FileNotFoundError:
        return None
    root = tree.getroot()
    ob = root.find('object')
    bndbox = ob.find('bndbox')
    xmin = bndbox.find('xmin').text
    xmax = bndbox.find('xmax').text
    ymin = bndbox.find('ymin').text
    ymax = bndbox.find('ymax').text
    
    return [int(xmin), int(ymin), int(xmax), int(ymax)]

In [112]:
def load_oxford_pets_3(target_size = (224,224)):
    input_file = './annotations/list.txt'
    file = open(input_file)
    list_txt = file.readlines()
    file.close()
    list_txt = list_txt[6:]
    np.random.shuffle(list_txt)
        
    train_dataset = {'name':[], 'label':[], 'image':[],'box':[]}
    test_dataset = {'name':[], 'label':[],'image':[]}
    
    for line in list_txt:
        image_name, class_id, species, breed_id = line.split()
        image_file = './images/images/' + image_name + '.jpg'
        box_file = './annotations/xmls/' + image_name + '.xml'
        
        if not os.path.exists(image_file):
            continue
        
        img = image.load_img(image_file)
        sx = target_size[0] / img.width
        sy = target_size[1] / img.height
        
        img = img.resize(size = target_size)
        img = image.img_to_array(img)
        
        if os.path.exists(box_file):
            box = getBB(box_file)
        
            box[0] = round(box[0] * sx)
            box[1] = round(box[1] * sy)
            box[2] = round(box[2] * sx)
            box[3] = round(box[3] * sy)
            
            train_dataset['box'].append(box)
            train_dataset['name'].append(image_name)
            train_dataset['label'].append(int(species)-1)
            train_dataset['image'].append(img)
        
        else:
            test_dataset['name'].append(image_name)
            test_dataset['label'].append(int(species)-1)
            test_dataset['image'].append(img)
    
    train_dataset['image'] = np.array(train_dataset['image'])
    train_dataset['box'] = np.array(train_dataset['box'])
    train_dataset['label'] = np.array(train_dataset['label'])
    train_dataset['name'] = np.array(train_dataset['name'])
    
    test_dataset['image'] = np.array(test_dataset['image'])
    test_dataset['label'] = np.array(test_dataset['label'])
    test_dataset['name'] = np.array(test_dataset['name'])
    
    return train_dataset, test_dataset

In [113]:
train_dataset, test_dataset = load_oxford_pets_3()

In [115]:
x_train = train_dataset['image'] / 255.0
y_train = train_dataset['box'] / x_train.shape[1]
x_test = test_dataset['image'] / 255.0

In [116]:
def IOU(y_true, y_pred):
    b1_xmin, b1_ymin, b1_xmax, b1_ymax = tf.unstack(y_true, 4, axis = -1)
    b2_xmin, b2_ymin, b2_xmax, b2_ymax = tf.unstack(y_pred, 4, axis = -1)

    zero = tf.convert_to_tensor(0.0, y_true.dtype)
    b1_width = tf.maximum(zero, b1_xmax - b1_xmin)
    b1_height = tf.maximum(zero, b1_ymax - b1_ymin)
    b2_width = tf.maximum(zero, b2_xmax - b2_xmin)
    b2_height = tf.maximum(zero, b2_ymax - b2_ymin)
    
    b1_area = b1_width * b1_height
    b2_area = b2_width * b2_height
    
    intersect_ymin = tf.maximum(b1_ymin, b2_ymin)
    intersect_xmin = tf.maximum(b1_xmin, b2_xmin)
    intersect_ymax = tf.minimum(b1_ymax, b2_ymax)
    intersect_xmax = tf.minimum(b1_xmax, b2_xmax)
    
    intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
    intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
    intersect_area = intersect_width * intersect_height
    
    union_area = b1_area + b2_area - intersect_area
    iou = intersect_area / union_area
    
    return iou

In [117]:
def creat_cnn2d(input_shape , num_units = 4):
    inputs = Input(shape = input_shape)
    x = Conv2D(filters = 16, kernel_size=(3,3), activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = MaxPool2D()(x)
    
    x = Conv2D(filters = 32, kernel_size=(3,3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPool2D()(x)
    x = Dropout(rate=0.2)(x)
    
    x = Conv2D(filters = 32, kernel_size=(3,3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPool2D()(x)
    x = Dropout(rate = 0.2)(x)
    
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate=0.2)(x)
    
    outputs = Dense(units=num_units, activation='sigmoid')(x)
    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    
    return model

In [118]:
model = creat_cnn2d(input_shape=x_train.shape[1:])
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 222, 222, 16)      448       
_________________________________________________________________
batch_normalization_8 (Batch (None, 222, 222, 16)      64        
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 111, 111, 16)      0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 109, 109, 32)      4640      
_________________________________________________________________
batch_normalization_9 (Batch (None, 109, 109, 32)      128       
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 54, 54, 32)        0   

In [127]:
opt = Adam()
model.compile(optimizer=opt, loss='mse', metrics=[IOU])
model.fit(x_train, y_train, epochs=20, batch_size = 128, verbose = 1)

Train on 3671 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x2dc8204cfd0>

In [42]:
model.evaluate(x_train, y_train, verbose=2)

3671/1 - 7s - loss: 0.1446 - IOU: 0.0877


[0.15923425983876047, 0.08770043]

In [43]:
k = 8
train_box = model.predict(x_train[:k])
test_box = model.predict(x_test[:k])

In [44]:
train_box

array([[2.0393729e-04, 9.9998629e-01, 4.4068098e-03, 2.4878979e-04],
       [1.9715995e-02, 0.0000000e+00, 8.8928497e-01, 1.4221370e-03],
       [0.0000000e+00, 0.0000000e+00, 9.9999899e-01, 9.7234720e-01],
       [1.3509393e-04, 2.9802322e-08, 2.6583672e-05, 9.7205961e-01],
       [6.6113472e-01, 1.9162893e-04, 0.0000000e+00, 1.8377930e-02],
       [1.9371510e-06, 0.0000000e+00, 5.1922947e-02, 3.7900895e-01],
       [5.2821934e-03, 5.1552057e-04, 4.2596978e-01, 8.8040102e-01],
       [1.1540949e-03, 0.0000000e+00, 7.4701631e-01, 6.6032773e-01]],
      dtype=float32)