In [1]:
from keras.models import Model
import tensorflow as tf
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate
from keras.applications.mobilenet import MobileNet
from keras.applications import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50

Using TensorFlow backend.


In [2]:
FULL_YOLO_BACKEND_PATH  = "full_yolo_backend.h5"   # should be hosted on a server
TINY_YOLO_BACKEND_PATH  = "tiny_yolo_backend.h5"   # should be hosted on a server
SQUEEZENET_BACKEND_PATH = "squeezenet_backend.h5"  # should be hosted on a server
MOBILENET_BACKEND_PATH  = "mobilenet_backend.h5"   # should be hosted on a server
INCEPTION3_BACKEND_PATH = "inception_backend.h5"   # should be hosted on a server
VGG16_BACKEND_PATH      = "vgg16_backend.h5"       # should be hosted on a server
RESNET50_BACKEND_PATH   = "resnet50_backend.h5"  

In [3]:
class BaseFeatureExtractor(object):
    
    def __init__( self, input_size ) :
        raise NotImplementedError("Error Message")
        
    def normalize( self, image ):
        raise NotImplementedError("Error Message ")
        
    def get_output_shape(self):
        return self.feature_extractor.get_output_shape_at(-1)[1:3]

    def extract(self, input_image):
        return self.feature_extractor(input_image)

In [4]:
class FullYoloFeature(BaseFeatureExtractor):
    
    def __init__( self, input_size ):
        
        input_image = Input(shape = (input_size , input_size , 3))
        
        def space_to_depth_x2(x):
            return tf.nn.space_to_depth(x , block_size = 2 )
        
        
        def Conv_block( inputs  , filters , kernel_size = 3 , strides = 1 , name = 1 , use_bias = False , use_pooling= False   ):

            x = Conv2D(filters , kernel_size=(kernel_size , kernel_size) , strides=(strides , strides) ,
                      padding = 'same' , name = "conv_" + str(name) , use_bias=use_bias)(inputs)
            x = BatchNormalization(name = 'norm_' + str(name))(x)
            x = LeakyReLU(alpha=0.1)(x)
            if use_pooling:
                x = MaxPooling2D(pool_size=( 2 , 2 ))(x)
            return x 
        
        
        x = Conv_block(input_image ,32 , name = 1 , use_pooling = True)
        
        x = Conv_block( x , 64 , name =  2 , use_pooling = True )
        x = Conv_block( x , 128 , name =  3 , use_pooling=False)
        x = Conv_block( x , 64 ,  name =  4 , use_pooling=False )
        
        x = Conv_block( x , 128 , name =  5 , use_pooling=True)
        x = Conv_block( x , 256 , name =  6 , use_pooling=False)
        x = Conv_block( x , 128 ,  name =  7 , use_pooling=False )
        
        
        x = Conv_block( x , 256 , name =  8 , use_pooling=True)
        x = Conv_block( x , 512 , name =  9 , use_pooling=False)
        x = Conv_block( x , 256 ,  name = 10 , use_pooling=False )
        
        
        x = Conv_block( x , 512 , name =  11 , use_pooling=False)
        x = Conv_block( x , 256 ,  name = 12 , use_pooling=False )
        
        
        x = Conv_block( x , 512 , name =  13 , use_pooling=False)
        
        skip_connection  =  x
        x = MaxPooling2D(pool_size=( 2 , 2 ))(x)
        
        
        x = Conv_block( x , 1024 , kernel_size= 3 ,name =  14 , use_pooling=False)
        x = Conv_block( x , 512 , kernel_size= 1 ,name =  15 , use_pooling=False)
        x = Conv_block( x , 1024 , kernel_size= 3 ,name =  16 , use_pooling=False)
        x = Conv_block( x , 512 , kernel_size= 1 ,name =  17 , use_pooling=False)
        
        x = Conv_block( x , 1024 , kernel_size= 3 ,name =  18 , use_pooling=False)
        x = Conv_block( x , 1024 , kernel_size= 3 ,name =  19 , use_pooling=False)
        x = Conv_block( x , 1024 , kernel_size= 3 ,name =  20 , use_pooling=False)
       
    
        skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)
        skip_connection = BatchNormalization(name='norm_21')(skip_connection)
        skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
        skip_connection = Lambda(space_to_depth_x2)(skip_connection)
        
        x = concatenate([skip_connection, x])
        
        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
        x = BatchNormalization(name='norm_22')(x)
        x = LeakyReLU(alpha=0.1)(x)
        
        
        self.feature_extractor = Model(input_image, x) 
        #self.feature_extractor.load_weights(FULL_YOLO_BACKEND_PATH)
        
        
    
    def normalize( self, image ):
        return image /255.
        
        
                    
            

In [5]:
inputs_value = Input([512 , 512 , 3 ])
model = FullYoloFeature(512)

In [6]:
model.feature_extractor.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 512, 512, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
norm_1 (BatchNormalization)     (None, 512, 512, 32) 128         conv_1[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 512, 512, 32) 0           norm_1[0][0]                     
____________________________________________________________________________________________

In [7]:
class  TinyYoloFeature(BaseFeatureExtractor):
    
    def __init__( self, input_size ):
        
        input_image  = Input(shape= ( input_size , input_size , 3 ))
        
        def Conv_Block(inputs , filters , kernel_size = 3 , stride = 1 , padding = 'same' , name = 1, use_bias = False  , use_maxpooling = False):
            
            x = Conv2D( filters , (kernel_size , kernel_size) , strides=stride , padding = padding , name = 'conv_'+ str(name), use_bias = False )(inputs)
            x = BatchNormalization(name = 'nor_' + str(name))(x)
            x = LeakyReLU(alpha=0.1)(x)
            if use_maxpooling:
                x = MaxPooling2D(pool_size=(2,2))(x)
            return x
        
        
        x = Conv_Block(input_image , 16 , kernel_size=3 , stride=1 , padding='same', name = 1, use_bias=False , use_maxpooling=True )
        
        for i in range(0 ,4):
            x = Conv_Block(x , 32*(2**i) , kernel_size=3 , stride= 1 , padding='same', name = i+2 , use_bias=False , use_maxpooling=True )
            
        
        x = Conv_Block( x , 512 , kernel_size= 3 , name = 6 ,  use_bias= False , use_maxpooling=True )
        
        for i  in range(0,2 ):
            x = Conv_Block( x , 1024 , kernel_size=3 , name = (i+7) , use_maxpooling=True )
        
        self.feature_extractor = Model(input_image, x)  
        #self.feature_extractor.load_weights(TINY_YOLO_BACKEND_PATH)
        
    def normalize( self, image ):
        return image /255.
        

In [8]:
model = TinyYoloFeature(512)

In [9]:
model.feature_extractor.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
conv_1 (Conv2D)              (None, 512, 512, 16)      432       
_________________________________________________________________
nor_1 (BatchNormalization)   (None, 512, 512, 16)      64        
_________________________________________________________________
leaky_re_lu_23 (LeakyReLU)   (None, 512, 512, 16)      0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 256, 256, 16)      0         
_________________________________________________________________
conv_2 (Conv2D)              (None, 256, 256, 32)      4608      
_________________________________________________________________
nor_2 (BatchNormalization)   (None, 256, 256, 32)      128 

In [10]:
class MobileNetFeature(BaseFeatureExtractor):
    
    def __init__( self, input_size ):
        
        input_image  =  Input(shape = (input_size , input_size , 3 ))
        mobilenet    =  MobileNet(input_shape = (244 , 244 , 3 ) , include_top = False )
        #mobilenet.load_weights(MOBILENET_BACKEND_PATH)
        x = mobilenet(input_image)
        
        self.feature_extractor = Model(input_image, x) 
        
        
    
    def normalize(self, image):
        image = image / 255.
        image = image  - 0.5
        image = image *2.
        return image
    

        


In [11]:
model = MobileNetFeature(512)
model.feature_extractor.summary()



Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
mobilenet_1.00_224 (Model)   multiple                  3228864   
Total params: 3,228,864
Trainable params: 3,206,976
Non-trainable params: 21,888
_________________________________________________________________


In [12]:
class SqueezeNetFeature(BaseFeatureExtractor):
    """docstring for ClassName"""
    def __init__(self, input_size):

        # define some auxiliary variables and the fire module
        sq1x1  = "squeeze1x1"
        exp1x1 = "expand1x1"
        exp3x3 = "expand3x3"
        relu   = "relu_"

        def fire_module(x, fire_id, squeeze=16, expand=64):
            s_id = 'fire' + str(fire_id) + '/'

            x     = Conv2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
            x     = Activation('relu', name=s_id + relu + sq1x1)(x)

            left  = Conv2D(expand,  (1, 1), padding='valid', name=s_id + exp1x1)(x)
            left  = Activation('relu', name=s_id + relu + exp1x1)(left)

            right = Conv2D(expand,  (3, 3), padding='same',  name=s_id + exp3x3)(x)
            right = Activation('relu', name=s_id + relu + exp3x3)(right)

            x = concatenate([left, right], axis=3, name=s_id + 'concat')

            return x

        # define the model of SqueezeNet
        input_image = Input(shape=(input_size, input_size, 3))

        x = Conv2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(input_image)
        x = Activation('relu', name='relu_conv1')(x)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)

        x = fire_module(x, fire_id=2, squeeze=16, expand=64)
        x = fire_module(x, fire_id=3, squeeze=16, expand=64)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)

        x = fire_module(x, fire_id=4, squeeze=32, expand=128)
        x = fire_module(x, fire_id=5, squeeze=32, expand=128)
        x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)

        x = fire_module(x, fire_id=6, squeeze=48, expand=192)
        x = fire_module(x, fire_id=7, squeeze=48, expand=192)
        x = fire_module(x, fire_id=8, squeeze=64, expand=256)
        x = fire_module(x, fire_id=9, squeeze=64, expand=256)

        self.feature_extractor = Model(input_image, x)  
        #self.feature_extractor.load_weights(SQUEEZENET_BACKEND_PATH)

    def normalize(self, image):
        image = image[..., ::-1]
        image = image.astype('float')

        image[..., 0] -= 103.939
        image[..., 1] -= 116.779
        image[..., 2] -= 123.68

        return image    


In [13]:
model = SqueezeNetFeature(512)
model.feature_extractor.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 255, 255, 64) 1792        input_6[0][0]                    
__________________________________________________________________________________________________
relu_conv1 (Activation)         (None, 255, 255, 64) 0           conv1[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 127, 127, 64) 0           relu_conv1[0][0]                 
____________________________________________________________________________________________

In [14]:
class Inception3Feature(BaseFeatureExtractor):
    """docstring for ClassName"""
    def __init__(self, input_size):
        input_image = Input(shape=(input_size, input_size, 3))

        inception = InceptionV3(input_shape=(input_size,input_size,3), include_top=False)
        #inception.load_weights(INCEPTION3_BACKEND_PATH)

        x = inception(input_image)

        self.feature_extractor = Model(input_image, x)  

    def normalize(self, image):
        image = image / 255.
        image = image - 0.5
        image = image * 2.

        return image

In [17]:
model = Inception3Feature(512)
model.feature_extractor.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
inception_v3 (Model)         (None, 14, 14, 2048)      21802784  
Total params: 21,802,784
Trainable params: 21,768,352
Non-trainable params: 34,432
_________________________________________________________________


In [15]:
class VGG16Feature(BaseFeatureExtractor):
    """docstring for ClassName"""
    def __init__(self, input_size):
        vgg16 = VGG16(input_shape=(input_size, input_size, 3), include_top=False)
        #vgg16.load_weights(VGG16_BACKEND_PATH)

        self.feature_extractor = vgg16

    def normalize(self, image):
        image = image[..., ::-1]
        image = image.astype('float')

        image[..., 0] -= 103.939
        image[..., 1] -= 116.779
        image[..., 2] -= 123.68

        return image 


In [18]:
model = VGG16Feature(512)
model.feature_extractor.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 512, 512, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 512, 512, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 512, 512, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 256, 256, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 256, 256, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 256, 256, 128

In [19]:
class ResNet50Feature(BaseFeatureExtractor):
    """docstring for ClassName"""
    def __init__(self, input_size):
        resnet50 = ResNet50(input_shape=(input_size, input_size, 3), include_top=False)
        resnet50.layers.pop() # remove the average pooling layer
        #resnet50.load_weights(RESNET50_BACKEND_PATH)

        self.feature_extractor = Model(resnet50.layers[0].input, resnet50.layers[-1].output)

    def normalize(self, image):
        image = image[..., ::-1]
        image = image.astype('float')

        image[..., 0] -= 103.939
        image[..., 1] -= 116.779
        image[..., 2] -= 123.68
        return image

In [20]:
model = ResNet50Feature(512)
model.feature_extractor.summary()



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 518, 518, 3)  0           input_10[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 256, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 256, 