In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
def inception_module (inputs, filters_1X1, filters_3X3_reduce, filters_3X3, filters_5X5_reduce, filters_5X5, filters_pool):

  #1X1 Convolution
  conv_1X1 = layers.Conv2D(filters=filters_1X1,kernel_size=(1,1),padding='same',activation='relu')(inputs)

  #3X3 Convolution
  conv_3X3_reduce = layers.Conv2D(filters=filters_3X3_reduce,kernel_size=(1,1),padding='same',activation='relu')(conv_1X1)
  conv_3X3 = layers.Conv2D(filters=filters_3X3,kernel_size=(3,3),padding = 'same',activation='relu')(conv_3X3_reduce)

  #5X5 Convolution
  conv_5X5_reduce = layers.Conv2D(filters = filters_5X5_reduce, kernel_size=(1,1), padding='same',activation = 'relu')(conv_3X3)
  conv_5X5 = layers.Conv2D(filters = filters_5X5, kernel_size=(5,5), padding='same',activation = 'relu')(conv_5X5_reduce)

  #Max Pooling
  max_pool = layers.MaxPooling2D(pool_size=(3,3), strides =1, padding = 'same')(inputs)
  conv_pool = layers.Conv2D(filters=filters_pool, kernel_size = (1,1), padding = 'same', activation = 'relu')(max_pool)

  inception_out = layers.concatenate([conv_1X1,conv_3X3,conv_5X5,conv_pool])
  return inception_out

In [3]:
def auxilary_classifier(input_layer, number_of_class,activation):
  auxilary_avg_pool = layers.AveragePooling2D(pool_size=(5,5),strides=3)(input_layer)
  auxilary_conv = layers.Conv2D(filters=128,kernel_size=(1,1),padding='same',activation='relu')(auxilary_avg_pool)
  auxilary_flatten = layers.Flatten()(auxilary_conv)
  auxilary_dense = layers.Dense(units=1024,activation = 'relu')(auxilary_flatten)
  auxilary_drop_out = layers.Dropout(0.7)(auxilary_dense)
  auxilary_output = layers.Dense(units=number_of_class,activation=activation)(auxilary_drop_out)
  return auxilary_output

In [4]:
#GoogLeNet V1

def googlenet_v1 (input_shape, number_of_class, activation):

  input_layer = tf.keras.Input(shape=input_shape)

  # Stage 1
  conv_1 = layers.Conv2D(filters=64, kernel_size = (7,7),strides=2,padding='same',activation='relu')(input_layer)
  max_pool_1 = layers.MaxPooling2D(pool_size=(3,3),strides=2,padding = 'same')(conv_1)

  #Stage 2
  conv_2_reduce = layers.Conv2D(filters=64, kernel_size=(1,1),padding='same',activation='relu')(max_pool_1)
  conv_2 = layers.Conv2D(filters = 192, kernel_size = (3,3), padding='same',activation='relu')(conv_2_reduce)
  max_pool_2 = layers.MaxPooling2D(pool_size=(3,3),strides=2,padding='same')(conv_2)

  #Stage 3

  inception_3a = inception_module(inputs=max_pool_2, filters_1X1=64, filters_3X3_reduce=96, filters_3X3= 128, filters_5X5_reduce=16, filters_5X5= 32, filters_pool=32)
  inception_3b = inception_module(inputs=inception_3a, filters_1X1=128, filters_3X3_reduce=128, filters_3X3= 192, filters_5X5_reduce=32, filters_5X5= 96, filters_pool=64)
  max_pool_3 = layers.MaxPool2D(pool_size=(3,3),strides=2,padding='same')(inception_3b)

  #Stage 4
  inception_4a = inception_module(inputs=max_pool_3, filters_1X1=192, filters_3X3_reduce=96, filters_3X3= 208, filters_5X5_reduce=16, filters_5X5= 48, filters_pool=64)
  # Auxilary Classifier 1
  auxilary_1 = auxilary_classifier(inception_4a,number_of_class,activation)

  inception_4b = inception_module(inputs=inception_4a, filters_1X1=160, filters_3X3_reduce=112, filters_3X3= 224, filters_5X5_reduce=24, filters_5X5= 64, filters_pool=64)
  inception_4c = inception_module(inputs=inception_4b, filters_1X1=128, filters_3X3_reduce=128, filters_3X3= 256, filters_5X5_reduce=24, filters_5X5= 64, filters_pool=64)
  inception_4d = inception_module(inputs=inception_4c, filters_1X1=112, filters_3X3_reduce=144, filters_3X3= 288, filters_5X5_reduce=32, filters_5X5= 64, filters_pool=64)
  # Auxilary Classifier 2
  auxilary_2 = auxilary_classifier(inception_4d,number_of_class,activation)

  inception_4e = inception_module(inputs=inception_4d, filters_1X1=256, filters_3X3_reduce=160, filters_3X3= 320, filters_5X5_reduce=32, filters_5X5= 128, filters_pool=128)
  max_pool_4 = layers.MaxPool2D(pool_size=(3,3),strides=2,padding='same')(inception_4e)

  #Stage 5

  inception_5a = inception_module(inputs=max_pool_4, filters_1X1=256, filters_3X3_reduce=160, filters_3X3= 320, filters_5X5_reduce=32, filters_5X5= 128, filters_pool=128)
  inception_5b = inception_module(inputs=inception_5a, filters_1X1=384, filters_3X3_reduce=192, filters_3X3= 384, filters_5X5_reduce=48, filters_5X5= 128, filters_pool=128)

  # Final pooling and output layer

  avg_pool = layers.AveragePooling2D(pool_size=(7,7))(inception_5b)
  flatten = layers.Flatten()(avg_pool)
  output_layer = layers.Dense(units=number_of_class,activation=activation)(flatten)
  model = models.Model(inputs=input_layer,outputs = [output_layer,auxilary_2,auxilary_1])
  return model

input_shape = (224,224,3)
number_of_class = 20
activation = 'softmax'

googlenet_v1_model = googlenet_v1(input_shape,number_of_class,activation)
googlenet_v1_model.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 112, 112, 64  9472        ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 56, 56, 64)   0           ['conv2d[0][0]']                 
                                                                                              