# GoogLeNet Implementation in TensorFlow

In [1]:
import numpy as np
import tensorflow as tf

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


## Building the Graph

In [2]:
tf.reset_default_graph()

### Inputs, Placeholders, and Constants

In [3]:
NUM_CLASSES = 10
IMAGE_WIDTH = 224   # Image width for original GoogLeNet architecture for the ImageNet Challenge
IMAGE_HEIGHT = 224  # Image width for the original GoogLeNet architecture for the ImageNet Challenge
x = tf.placeholder(tf.float32, shape=[None,IMAGE_WIDTH,IMAGE_HEIGHT,3]) # represents input 227 x 227 image with 3 color channels (RGB)
y_true = tf.placeholder(tf.float32, shape=[None, 10])
hold_prob = tf.placeholder(tf.float32)
training = tf.placeholder(tf.bool) # Used for batch normalization - a boolean to indicate whether or not we are training

### Helper Functions

In [4]:
def init_weights(shape):  # initializes the weights randomly with a normal distribution
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape): # initializes the bias term as a constant of 0.1 values
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def conv2d(x, W, pad=True, strides=[1,1,1,1]): # creates a 2D convolution with or without padding
    if pad:
        return tf.nn.conv2d(x, W, strides=strides, padding='SAME')
    else:
        return tf.nn.conv2d(x, W, strides=strides, padding='VALID')

def max_pool_nbyn(x, name, filter_size=2, stride=2, pad=True):   # creates a max pooling layer
    if pad:
        return tf.nn.max_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='SAME', name=name)
    else:
        return tf.nn.max_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='VALID', name=name)
def average_pool_nbyn(x, name, filter_size=2, stride=2, pad=True):   # creates a max pooling layer
    if pad:
        return tf.nn.avg_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='SAME', name=name)
    else:
        return tf.nn.avg_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='VALID', name=name)

def convolutional_layer(input_x, shape, strides=[1,1,1,1]):  # creates the convolutional layer including the weights and biases
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W, strides) + b) # applies a Rectified Linear Unit (ReLU) activation function

def normal_full_layer(input_layer, size):   # creates the fully connected layer
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b  # simple forward propagation using matrix multiplication

def batch_normalization(input_layer, training):  # function for batch normalization
    
    return tf.layers.batch_normalization(input_layer, training=training)

def local_response_normalization(input_layer, radius, alpha, beta, name, bias=1.0): # function for local response normalization
    
     return tf.nn.local_response_normalization(x, depth_radius=radius,
                                              alpha=alpha, beta=beta,
                                              bias=bias, name=name)


def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,  # Better function for conv layers
         padding='SAME', groups=1):
    """Create a convolution layer.
    Adapted from: https://github.com/ethereon/caffe-tensorflow
    """
    # Get number of input channels
    input_channels = int(x.get_shape()[-1])

    # Create lambda function for the convolution
    convolve = lambda i, k: tf.nn.conv2d(i, k,
                                         strides=[1, stride_y, stride_x, 1],
                                         padding=padding)

    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases of the conv layer
        weights = tf.get_variable('weights', shape=[filter_height,
                                                    filter_width,
                                                    input_channels/groups,
                                                    num_filters])
        biases = tf.get_variable('biases', shape=[num_filters])

    if groups == 1:
        conv = convolve(x, weights)

    # In the cases of multiple groups, split inputs & weights and
    else:
        # Split input and weights and convolve them separately
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups,
                                 value=weights)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]

        # Concat the convolved output together again
        conv = tf.concat(axis=3, values=output_groups)

    # Add biases
    bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))

    # Apply relu function
    relu = tf.nn.relu(bias, name=scope.name)

    return relu

### Function for Generating Inception Module

In [5]:
def inception_module(input_layer, conv_1x1_depth, reduce_3x3_depth, conv_3x3_depth, 
                     reduce_5x5_depth, conv_5x5_depth, pool_proj_depth, name):
    # individual 1 x 1 conv layer
    conv_1x1 = conv(input_layer, 1, 1, conv_1x1_depth, 1, 1, groups=1, name=(name+'_conv_1x1')) 
    
    # dimensionality reduction with 1 x 1 conv layer before 3 x 3 conv layer
    reduce_3x3 = conv(input_layer, 1, 1, reduce_3x3_depth, 1, 1, groups=1, name=(name+'_reduce_3x3')) 
    
    # dimensionality reduction with 1 x 1 conv layer before 5 x 5 conv layer
    reduce_5x5 = conv(input_layer, 1, 1, reduce_5x5_depth, 1, 1, groups=1, name=(name+'_reduce_5x5'))
    
    # 3 x 3 conv layer
    conv_3x3 = conv(reduce_3x3, 3, 3, conv_3x3_depth, 1, 1, groups=1, name=(name+'_conv_3x3'))
    
    # 5 x 5 conv layer
    conv_5x5 = conv(reduce_5x5, 5, 5, conv_5x5_depth, 1, 1, groups=1, name=(name+'_conv_5x5'))
    
    # 3 x 3 max pooling
    max_pool = max_pool_nbyn(input_layer, filter_size=3, stride=1, name=(name+'_max_pool'))
    
    # 1 x 1 convolution on top of pooling
    pool_proj = conv(max_pool, 1, 1, pool_proj_depth, 1, 1, groups=1, name=(name+'_pool_proj'))
    
    # depth-wise concatenation of previous layers to build output
    filter_concat = tf.concat([conv_1x1, conv_3x3, conv_5x5, pool_proj], axis=3)
    
    return filter_concat

### Function for Generating Auxiliary Network

In [6]:
def auxiliary_network(input_layer, name, dropout_prob=0.7):
    
    # 5 x 5 pooling with stride of 3
    avg_pool_5x5 = average_pool_nbyn(input_layer, filter_size=5, stride=3, name=(name+'_avg_pool_5x5'))
    
    # 1 x 1 conv layer with 128 filters for dimensionality reduction
    conv_1x1 = conv(avg_pool_5x5, 1, 1, 128, 1, 1, groups=1, name=(name+'_conv_1x1'))
    
    # Flattened layer
    conv_1x1_flattened = tf.reshape(conv_1x1, [-1, (conv_1x1.shape[1]*conv_1x1.shape[2]*conv_1x1.shape[3])])
    
    # Fully connected layer with 1024 units and ReLU activation
    fc = tf.nn.relu(normal_full_layer(conv_1x1_flattened, 1024))
    
    # dropout layer with default 70% probability of dropout
    dropout = tf.nn.dropout(fc, keep_prob=(1-dropout_prob))
    
    # Linear layer
    linear = normal_full_layer(dropout, NUM_CLASSES)
    
    # Softmax activation for classification output
    softmax_output = tf.nn.softmax(linear)
    
    return softmax_output

### CONV 1 - 64 7x7 filters with stride = 2

In [7]:
conv_1 = conv(x, 7, 7, 64, 2, 2, groups=1, name='conv_1')
conv_1.shape

TensorShape([Dimension(None), Dimension(112), Dimension(112), Dimension(64)])

### POOL 1 - 3 x 3 max pooling with stride = 2

In [8]:
pool_1 = max_pool_nbyn(conv_1, filter_size=3, stride=2, pad=True, name='pool_1')
pool_1.shape

TensorShape([Dimension(None), Dimension(56), Dimension(56), Dimension(64)])

### CONV 2 - 192 3 x 3 filters with stride = 1

In [9]:
conv_2 = conv(pool_1, 3, 3, 192, 1, 1, groups=1, name='conv_2')
conv_2.shape

TensorShape([Dimension(None), Dimension(56), Dimension(56), Dimension(192)])

### POOL 2 - 3 x 3 max pooling with stride = 2

In [10]:
pool_2 = max_pool_nbyn(conv_2, filter_size=3, stride=2, pad=True, name='pool_2')
pool_2.shape

TensorShape([Dimension(None), Dimension(28), Dimension(28), Dimension(192)])


### Inception 1 - First Inception module

In [11]:
inception_1 = inception_module(pool_2, 64, 96, 128, 16, 32, 32, name='inception_1')
inception_1.shape

TensorShape([Dimension(None), Dimension(28), Dimension(28), Dimension(256)])

### Inception 2 - Second Inception module

In [12]:
inception_2 = inception_module(inception_1, 128, 128, 192, 32, 96, 64, name='inception_2')
inception_2.shape

TensorShape([Dimension(None), Dimension(28), Dimension(28), Dimension(480)])

### POOL 3 - 3 x 3 max pooling with stride = 2

In [13]:
pool_3 = max_pool_nbyn(inception_2, filter_size=3, stride=2, pad=True, name='pool_3')
pool_3.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(480)])

### Inception 3 - Third Inception module

In [14]:
inception_3 = inception_module(pool_3, 192, 96, 208, 16, 48, 64, name='inception_3')
inception_3.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(512)])

### Auxiliary Network 1: Network on the side with softmax output

In [15]:
softmax_0 = auxiliary_network(inception_3, name='aux_network_1')
softmax_0.shape

TensorShape([Dimension(None), Dimension(10)])

### Inception 4 - Fourth Inception module

In [16]:
inception_4 = inception_module(inception_3, 160, 112, 224, 24, 64, 64, name='inception_4')
inception_4.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(512)])

### Inception 5 - Fifth Inception module

In [17]:
inception_5 = inception_module(inception_4, 128, 128, 256, 24, 64, 64, name='inception_5')
inception_5.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(512)])

### Inception 6 - Sixth Inception module

In [18]:
inception_6 = inception_module(inception_5, 112, 144, 288, 32, 64, 64, name='inception_6')
inception_6.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(528)])

### Auxiliary Network 2: Network on the side with softmax output

In [19]:
softmax_1 = auxiliary_network(inception_6, name='aux_network_2')
softmax_1.shape

TensorShape([Dimension(None), Dimension(10)])

### Inception 7 - Seventh Inception module

In [20]:
inception_7 = inception_module(inception_6, 256, 160, 320, 32, 128, 128, name='inception_7')
inception_7.shape

TensorShape([Dimension(None), Dimension(14), Dimension(14), Dimension(832)])

### POOL 4 - 3 x 3 max pooling with stride = 2

In [21]:
pool_4 = max_pool_nbyn(inception_7, filter_size=3, stride=2, pad=True, name='pool_4')
pool_4.shape

TensorShape([Dimension(None), Dimension(7), Dimension(7), Dimension(832)])

### Inception 8 - Eighth Inception module

In [22]:
inception_8 = inception_module(pool_4, 256, 160, 320, 32, 128, 128, name='inception_8')
inception_8.shape

TensorShape([Dimension(None), Dimension(7), Dimension(7), Dimension(832)])

### Inception 9 - Ninth Inception module

In [23]:
inception_9 = inception_module(inception_8, 384, 192, 384, 48, 128, 128, name='inception_9')
inception_9.shape

TensorShape([Dimension(None), Dimension(7), Dimension(7), Dimension(1024)])

### AVG POOL - 7 x 7 average pooling with stride = 1 and no padding

In [24]:
avg_pool = average_pool_nbyn(inception_9, filter_size=7, stride=1, pad=False, name='avg_pool')
avg_pool.shape

TensorShape([Dimension(None), Dimension(1), Dimension(1), Dimension(1024)])

### Dropout Layer

In [25]:
dropout_layer = tf.nn.dropout(avg_pool, keep_prob=hold_prob)
dropout_layer.shape

TensorShape([Dimension(None), Dimension(1), Dimension(1), Dimension(1024)])

### Linear Layer

In [26]:
flattened = tf.reshape(dropout_layer, [-1, 1024])
linear_layer = normal_full_layer(flattened, NUM_CLASSES)
linear_layer.shape

TensorShape([Dimension(None), Dimension(10)])

### Softmax Classification Output

In [27]:
y_pred = tf.nn.softmax(linear_layer)
y_pred.shape

TensorShape([Dimension(None), Dimension(10)])