In [0]:
import os
import cv2
import copy
import numpy as np
import imgaug as ia
import pandas

# Parsing the annotation and image arrays

In [0]:
def parse_annotation():

    colnames = ['filename', 'file_size', 'file_attributes', 'region_count', 'region_id', 'region_shape_attributes', 'region_attributes']
    data = pandas.read_csv('annotation/via_export_csv.csv', names=colnames)

    name      = data.filename.tolist()
    reg_shape = data.region_shape_attributes.tolist()

    all_ann = []
    all_img = []
    Y_train = [None]*14

    cones = 0
    dim = [None]*2

    while cones < len(name)-1:

        try:
            cones += 1
            
            image = cv2.imread("images/"+str(name[cones]))

            if image is None:
                print("File does not exist")

            else:
                all_img.append(image[:])
                for i in range(7):
                    string = reg_shape[cones].split(',')

                    for j in range(2):
                        temp = string[j+1].split(':')

                        if j==1:
                            temp[1] = temp[1].replace("}", "")
                        
                        dim[j] = int(temp[1])
                    
                    x, y = dim[0], dim[1]
                    
                    Y_train[2*i]   = x
                    Y_train[(2*i)+1] = y
                    cones += 1

                cones -=1
                all_ann.append(Y_train[:])
        
        except FileNotFoundError:
            n = name[cones]
            if n != t:
                print("File does not exist")
                t = name[cones]
            continue
   
    return all_img,all_ann

def normalize(X_train):
    new = []
    for i in range(len(X_train)):
        new.append(X_train[i]/255.)
    return new

In [0]:
from keras.models import Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
import tensorflow as tf
import keras.backend as K
from keras.layers.merge import concatenate
from keras.optimizers import SGD, Adam, RMSprop
from keras.initializers import glorot_uniform
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.applications.resnet50 import ResNet50

In [0]:
class keypoints(object):
    def __init__(self, backend,
                       input_size, 
                       labels,
                       classes=14):
        
        self.X_predicted = [None] * 7
        self.Y_predicted = [None] * 7
        self.X_groundtruth = [None] * 7
        self.Y_groundtruth = [None] * 7
        
        self.input_size = input_size
        
        self.labels   = list(labels)
        self.nb_class = len(self.labels)

        self.class_wt = np.ones(self.nb_class, dtype='float32')

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        # input_image = Input(shape=(self.input_size, self.input_size, 3))

        base_model = ResNet50(weights= None, include_top=False, input_shape= (self.input_size,self.input_size,3))

        x = base_model.output
        x = Flatten()(x)
        output = Dense(14, activation='relu', name='fc', kernel_initializer = 'normal')(x)

        self.model = Model(inputs = base_model.input, outputs = output)

        self.model.summary()


    def delta_(self, p1, p2, x, y):
        return K.sqrt((x[p1]-x[p1])**2 + (y[p2]-y[p2])**2)

    def cross_ratio(self, surf, x, y):
        if surf == "left":
            return (self.delta_(0, 2, x, y)/self.delta_(0, 3, x, y)) / (self.delta_(1, 2, x, y)/self.delta_(1, 3, x, y))
        elif surf == "right":
            return (self.delta_(0, 5, x, y)/self.delta_(0, 6, x, y)) / (self.delta_(4, 5, x, y)/self.delta_(4, 6, x, y))
    

    def custom_loss(self, groundtruth, pridection):      # ETH custom loss
        segma = 0.0001  # Cross ratio controlling factor
        Cr3D = 1.39408  # The 3D cross ratio of the cone
        loss = 0
        
        loss = (K.square(pridection-groundtruth))

        # for i in range(0, 14, 2):
        #     self.X_predicted[int(i/2)] = pridection[i]
        #     self.X_groundtruth[int(i/2)] = groundtruth[i]

        # for j in range(1, 14, 2):
        #     self.Y_predicted[int(j/2)] = pridection[j]
        #     self.Y_groundtruth[int(j/2)] = groundtruth[j]
        

        # for k in range(0,7):
        #     loss += (self.X_predicted[k]-self.X_groundtruth[k])**2 + (self.Y_predicted[k]-self.Y_groundtruth[k])**2 + \
        #         segma*((self.cross_ratio("left", self.X_predicted, self.Y_predicted)-Cr3D)**2 + (self.cross_ratio("right", self.X_predicted, self.Y_predicted)-Cr3D)**2)

        return loss

    def load_weights(self, weight_path):
        self.model.load_weights(weight_path)

    def train(self, train_imgs,     # the list of images to train the model
                    train_times,    # the number of time to repeat the training set, often used for small datasets
                    nb_epochs,      # number of epoches
                    learning_rate,  # the learning rate
                    batch_size,     # the size of the batch
                    saved_weights_name='best_weights.h5',
                    debug=False):     

        self.batch_size = batch_size

        self.debug = debug

        ############################################
        # Make train generators
        ############################################  

        X_train_orig ,Y_train_orig = parse_annotation()

        X_train = np.array(normalize(X_train_orig))
        Y_train = np.array(Y_train_orig)

        print("Shape of X_train is: " + str(np.array(X_train).shape))
        print("Shape of Y_train is: " + str(np.array(Y_train).shape))

        ############################################
        # Compile the model
        ############################################

        sgd = SGD(lr=0.0001, momentum=0.9)  
        self.model.compile(optimizer = sgd, loss = self.custom_loss, metrics = ['accuracy'])

        ############################################
        # Make a few callbacks
        ############################################

        early_stop = EarlyStopping(monitor='val_loss', 
                           min_delta=0.001, 
                           patience=3, 
                           mode='min', 
                           verbose=1)
        checkpoint = ModelCheckpoint(saved_weights_name, 
                                     monitor='val_loss', 
                                     verbose=1, 
                                     save_best_only=True, 
                                     mode='min', 
                                     period=1)
        tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/'), 
                                  histogram_freq=0, 
                                  #write_batch_performance=True,
                                  write_graph=True, 
                                  write_images=False)

        ############################################
        # Start the training process
        ############################################        

        self.model.fit(X_train, Y_train, 
                        epochs = nb_epochs, 
                        batch_size = self.batch_size,
                        callbacks = [checkpoint], 
                        workers = 3,
                        max_queue_size = 8) 

        self.model.save_weights('weights.h5')


    def predict(self, image):
        image_h, image_w, _ = image.shape
        image = cv2.resize(image, (self.input_size, self.input_size))
        image = np.array(normalize(image))

        input_image = image[:,:,::-1]
        input_image = np.expand_dims(input_image, 0)

        points = self.model.predict(input_image)
        

        return points

# Training Process

In [0]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [0]:
train_imgs, train_labels = parse_annotation()

In [0]:
with open(config.json) as config_buffer:    
        config = json.loads(config_buffer.read())

In [0]:
kp = keypoints(backend             = config['model']['backend'],
                input_size          = config['model']['input_size'], 
                labels              = config['model']['labels'])

In [0]:
kp.train(train_imgs         = train_imgs,
          train_times        = config['train']['train_times'],
          nb_epochs          = config['train']['nb_epochs'], 
          learning_rate      = config['train']['learning_rate'], 
          batch_size         = config['train']['batch_size'],
          saved_weights_name = config['train']['saved_weights_name'],
          debug              = config['train']['debug'])

In [0]:
kp.load_weights(weights_path)

# Prediction

In [0]:
image = cv2.imread(image_path)
points = kp.predict(image)