In [None]:
from glob import glob
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import keras
from keras import backend as K
from tensorflow.keras import optimizers
from keras.models import Sequential
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from keras.callbacks import LearningRateScheduler
import matplotlib.pyplot as plt
import os


batch_size = 128
n_epochs = 12
per_sample_normalization = True
data_augmentation = False
globalAVGPooling = True
drop_out = True
capacity = ['low', 'high'][1]
last_layer_activation = ['softmax', 'sigmoid', None][1]
loss = ['categorical_crossentropy', 'binary_crossentropy', 'mean_squared_error', 'mean_absolute_error'][1]
txt = 'rnd'
img_size = 224
num_classes = 20
img_with_padding = False
voc_classes = {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4, 'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10, 'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15, 'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19}


In [None]:
# Download and untar voc dataset
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!tar xf VOCtrainval_06-Nov-2007.tar
print('VOCtrainval_06-Nov-2007.tar has been uncompressed successfully.')

In [None]:
# Read and format data
def read_content(xml_file: str):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_objects = []
    for boxes in root.iter('object'):

        classname = boxes.find("name").text
        list_with_all_objects.append(voc_classes[classname])

        ymin, xmin, ymax, xmax = None, None, None, None

        ymin = int(boxes.find("bndbox/ymin").text)
        xmin = int(boxes.find("bndbox/xmin").text)
        ymax = int(boxes.find("bndbox/ymax").text)
        xmax = int(boxes.find("bndbox/xmax").text)

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return list_with_all_objects, list_with_all_boxes


files = glob('VOCdevkit/VOC2007/JPEGImages/*.jpg')

n_samples = len(files)
files = files[:n_samples]
x_train, y_train, x_test, y_test = [], [], [], []

np.random.seed(0)
ridx = np.random.randint(0, n_samples, int(n_samples*0.2))
train_test_split = np.zeros(n_samples)
train_test_split[ridx] = 1
for f, i in zip(files, range(n_samples)):
    img = cv2.imread(f)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    if img_with_padding:
        catdim = 0
        if img.shape[0] > img.shape[1]:
            cataxis = 1
            catdim = int((img.shape[0] - img.shape[1]) / 2)
            catarray = np.zeros((img.shape[0], catdim, img.shape[2]), img.dtype)
            img = np.concatenate((catarray, img, catarray), axis=cataxis)
        elif img.shape[0] < img.shape[1]:
            cataxis = 0
            catdim = int((img.shape[1] - img.shape[0]) / 2)
            catarray = np.zeros((catdim, img.shape[1], img.shape[2]), img.dtype)
            img = np.concatenate((catarray, img, catarray), axis=cataxis)
    img = cv2.resize(img, (img_size, img_size))
    
    if train_test_split[i]:
        x_test.append(img)
    else:
        x_train.append(img)
    
    classes = np.zeros(num_classes)
    root, name = f.split('JPEGImages', 1)
    cnames, _ = read_content(root+'Annotations'+name[:-3]+'xml')
    for c in cnames:
        classes[c] = 1.0
            
    if train_test_split[i]:
        y_test.append(classes)
    else:
        y_train.append(classes)
    
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [None]:
# Check out the segmentation data
files = glob('VOCdevkit/VOC2007/SegmentationClass/*.png')
print(len(files))
im = cv2.imread(files[400])
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
print(np.unique(im))
plt.imshow(im)
plt.show()
# Boundaries per tal de separar objectes un al costat de l'altre
# Però caldrà eliminar-les
# SegmentationObject per veure cada objecte amb colors diferents

In [None]:
# plot the data distribution
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
labs = voc_classes.keys()
data_balance = np.sum(y_train, 0) / y_train.shape[0]
ax.bar(labs,data_balance)
plt.xticks([i for i in range(20)], labs, rotation='vertical')
plt.show()


In [None]:
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(BatchNormalization())
if capacity is 'high':
  model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
if drop_out:
  model.add(Dropout(0.2))
 
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
if capacity is 'high':
  model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
if drop_out:
  model.add(Dropout(0.3))
 
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('relu'))
model.add(BatchNormalization())
if capacity is 'high':
  model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
if drop_out:
  model.add(Dropout(0.4))

if globalAVGPooling:
  model.add(GlobalAveragePooling2D())
else:
  model.add(Flatten())

model.add(Dense(num_classes, activation=last_layer_activation))
 
model.summary()

In [None]:
#training

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_metric(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

#data augmentation
val_data_gen_args = dict(rescale = None if per_sample_normalization else 1./255,
                     samplewise_center=True if per_sample_normalization else False,
                     samplewise_std_normalization=True if per_sample_normalization else False)
train_data_gen_args = dict(rescale = None if per_sample_normalization else 1./255,
                     samplewise_center=True if per_sample_normalization else False,
                     samplewise_std_normalization=True if per_sample_normalization else False,
                     rotation_range=20,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     zoom_range=0.2) if data_augmentation else val_data_gen_args
training_datagen = ImageDataGenerator(train_data_gen_args)
training_set = training_datagen.flow(x_train, y_train, batch_size=batch_size)
val_datagen = ImageDataGenerator(val_data_gen_args)
val_set = val_datagen.flow(x_test, y_test, batch_size=batch_size)
 
opt_rms = optimizers.RMSprop(learning_rate=0.001, decay=1e-6)
# AUC àrea sota la corba precision-recall
model.compile(loss=loss, optimizer=opt_rms, metrics=['AUC', f1_metric])
mdl_fit = model.fit_generator(training_set, steps_per_epoch=x_train.shape[0] // batch_size, 
                    epochs=n_epochs, verbose=1, validation_data=val_set)

In [None]:
# plot the loss
plt.plot(mdl_fit.history['loss'], label='train loss')
plt.plot(mdl_fit.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig(txt+'_LossVal_loss')

# plot the AUC
plt.plot(mdl_fit.history['auc'], label='train auc')
plt.plot(mdl_fit.history['val_auc'], label='val auc')
plt.legend()
plt.show()
plt.savefig(txt+'_aucVal')

# plot the F1
plt.plot(mdl_fit.history['f1_metric'], label='train f1')
plt.plot(mdl_fit.history['val_f1_metric'], label='val f1')
plt.legend()
plt.show()
plt.savefig(txt+'_f1Val')

#save model to disk
model.save_weights('model.h5')


accuracy no és bona opció per multi-label degut a que tenim vectors espars i tenim 0s, els quals suposaran un true positive. Però no ens interessa aquests.
Per això utilitzem més precision, recall, i f1-score

Crop objectes i afegir-los random
Per balancejar necessitarem + de 1 objecte per imatge
Les bound boxes tenim els extrems top-left i bottom-right (però no ocupen tota la bb)
Utilitzar IoU per mesurar overlapping dels objectes
SMPLR dataaugmentation
https://github.com/meysam-madadi/SMPLR/blob/master/Data/dataaugmentation.py