In [1]:
import tensorflow as tf

import utils

### Convolutional Layer

a common practice is to group conv layer and non-linearity together.

conv + relu twice. use variable scope to have reusable code.

* Input: [Batch_size, Height, Width, Channels]
* Filter: [F_Height, F_Width, Input_Channels, num_filters]
* Output: [Batch_size, new_Height, new_Width, num_filters]

In [6]:
def conv_relu(inputs, num_filters, k_size, stride, padding, scope_name):
    with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
        in_channels = inputs.shape[-1]
        kernel = tf.get_variable('kernel', [k_size, k_size, in_channels, num_filters],
                                initializer=tf.truncated_normal_initializer())
        biases = tf.get_variable('bias', [num_filters], initializer=tf.random_normal_initializer())
        
        conv = tf.nn.con2d(inputs, kernel, strides=[1,stride,stride,1], padding=padding)
        
        return tf.nn.relu(conv + biases)

### Pooling
downsampling technique to reduce the dimensionality of the feature map extracted from the conv layer in order to reduce the processing time.
replace a subregion of data with its most representive feature. (max pooling)

In [9]:
def maxpool(inputs, k_size, stride, padding='VALID', scope_name='pool'):
    with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:
        pool = tf.nn.max_pool(inputs, ksize=[1, k_size, k_size, 1], strides=[1, stride, stride, 1], padding=padding)
    return pool

### Fully Connected
every neuron in this layer is connected to every neuron in previous layer

In [10]:
def fully_connected(inputs, out_dim, scope_name='fc'):
    with tf.variable_scope(scope_name, reuse=tf.AUTO.REUSE) as scope:
        in_dim = inputs.shape[-1]
        w = tf.get_variable('weights', [in_dim, out_dim], initializer=tf.truncted_normal_initializer())
        b = tf.get_variable('biases', [out_dim], initializer=tf.constant_initializer(0.0))
        output = tf.matmul(inputs, w) + b
    return output

### Putting it together

In [1]:
def model(x):
    conv_relu1 = conv_relu(x, num_filters=32, k_size=5, stride=1, padding='SAME', scope_name='conv1')
    pool1 = maxpool(conv_relu1, k_size=2, stride=2, padding='VALID', scope_name='pool1')
    
    conv_relu2 = conv_relu(pool1, num_filters=64, k_size=5, stride=1, padding='SAME', scope_name='conv2')
    pool2 = maxpool(conv_relu2, k_size=2, stride=2, padding='VALID', scope_name='pool2')
    
    feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]
    pool2_flatten = tf.reshape(pool2, [-1, feature_dim])
    
    fc = tf.nn.relu(fully_connected(pool2_flatten, 1024, 'fc')) # use 1024 neurons
    
   
    