# AlexNet Graph Implementation in TensorFlow

In [1]:
import numpy as np
import tensorflow as tf

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


## Building the Graph

In [2]:
tf.reset_default_graph()

### Inputs, Placeholders, and Constants

In [3]:
NUM_CLASSES = 2
IMAGE_WIDTH = 201
IMAGE_HEIGHT = 201
x = tf.placeholder(tf.float32, shape=[None,IMAGE_WIDTH,IMAGE_HEIGHT,3]) # represents input 227 x 227 image with 3 color channels (RGB)
y_true = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])
hold_prob = tf.placeholder(tf.float32)
training = tf.placeholder(tf.bool) # Used for batch normalization - a boolean to indicate whether or not we are training

### Helper Functions

In [4]:
def init_weights(shape):  # initializes the weights randomly with a normal distribution
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape): # inditializes the bias term as a constant of 0.1 values
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def max_pool_nbyn(x, name, filter_size=2, stride=2, pad=True):   # creates a max pooling layer
    if pad:
        return tf.nn.max_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='SAME', name=name)
    else:
        return tf.nn.max_pool(x, ksize=[1, filter_size, filter_size, 1],
                          strides=[1, stride, stride, 1], padding='VALID', name=name)


def normal_full_layer(input_layer, size):   # creates the fully connected layer
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b  # simple forward propagation using matrix multiplication

def batch_normalization(input_layer, training):  # function for batch normalization
    
    return tf.layers.batch_normalization(input_layer, training=training)

def local_response_normalization(input_layer, radius, alpha, beta, name, bias=1.0): # function for local response normalization
    
     return tf.nn.local_response_normalization(x, depth_radius=radius,
                                              alpha=alpha, beta=beta,
                                              bias=bias, name=name)


def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name, pad=None, add_relu=True):
    
    kernel_shape = [filter_height, filter_width]
    #init_random_dist = tf.truncated_normal(kernel_shape, stddev=0.1)
    if pad is not None:
        padded_x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
        if add_relu:
            return tf.layers.conv2d(padded_x, filters=num_filters, 
                            kernel_size=(filter_width, filter_height),
                            strides=(stride_x, stride_y),
                            kernel_initializer=tf.contrib.layers.xavier_initializer(),
                            use_bias=True, bias_initializer=tf.constant_initializer(0.1), 
                            activation = tf.nn.relu, name=name)
        else:
            return tf.layers.conv2d(padded_x, filters=num_filters, 
                            kernel_size=(filter_width, filter_height),
                            strides=(stride_x, stride_y),
                            kernel_initializer=tf.contrib.layers.xavier_initializer(),
                            use_bias=True, bias_initializer=tf.constant_initializer(0.1), 
                            activation = None, name=name)
            
    else:
        if add_relu:
            return tf.layers.conv2d(x, filters=num_filters, 
                            kernel_size=(filter_width, filter_height),
                            strides=(stride_x, stride_y),
                            kernel_initializer=tf.contrib.layers.xavier_initializer(),
                            use_bias=True, bias_initializer=tf.constant_initializer(0.1), 
                            activation = tf.nn.relu, name=name)
        else:
            return tf.layers.conv2d(x, filters=num_filters, 
                            kernel_size=(filter_width, filter_height),
                            strides=(stride_x, stride_y),
                            kernel_initializer=tf.contrib.layers.xavier_initializer(),
                            use_bias=True, bias_initializer=tf.constant_initializer(0.1), 
                            activation = None, name=name)

### CONV 1: 96 11 x 11 filters with stride = 4

In [5]:
conv_1 = conv(x, 11, 11, 96, 4, 4, name='conv_1')
conv_1.shape

TensorShape([Dimension(None), Dimension(48), Dimension(48), Dimension(96)])

### POOL 1: 3 x 3 filters with stride =  2

In [6]:
pool_1 = max_pool_nbyn(conv_1, filter_size=3, stride=2, pad=False, name='pool_1')
pool_1.shape

TensorShape([Dimension(None), Dimension(23), Dimension(23), Dimension(96)])

### NORM 1: Batch Normalization

In [7]:
norm_1 = batch_normalization(pool_1, training=training)
norm_1.shape

TensorShape([Dimension(None), Dimension(23), Dimension(23), Dimension(96)])

### CONV 2: 256 5 x 5 filters with stride = 1, pad = 2

In [8]:
conv_2 = conv(norm_1, 5, 5, 256, 1, 1, pad=2, name='conv_2')
conv_2.shape

TensorShape([Dimension(None), Dimension(23), Dimension(23), Dimension(256)])

### POOL 2: 3 x 3 filters with stride = 2

In [9]:
pool_2 = max_pool_nbyn(conv_2, filter_size=3, stride=2, pad=False, name='pool_2')
pool_2.shape

TensorShape([Dimension(None), Dimension(11), Dimension(11), Dimension(256)])

### NORM 2: Batch Normalization

In [10]:
norm_2 = batch_normalization(pool_2, training=training)
norm_2.shape

TensorShape([Dimension(None), Dimension(11), Dimension(11), Dimension(256)])

### CONV 3: 384 3 x 3 filters with stride = 1, pad = 1

In [11]:
conv_3 = conv(norm_2, 3, 3, 384, 1, 1, pad=1, name='conv_3')
conv_3.shape

TensorShape([Dimension(None), Dimension(11), Dimension(11), Dimension(384)])

### CONV 4: 384 3 x 3 filters with stride = 1, pad = 1

In [12]:
conv_4 = conv(conv_3, 3, 3, 384, 1, 1, pad=1, name='conv_4')
conv_4.shape

TensorShape([Dimension(None), Dimension(11), Dimension(11), Dimension(384)])

### CONV 5: 256 3 x 3 filters with stride = 1, pad = 1

In [13]:
conv_5 = conv(conv_4, 3, 3, 256, 1, 1, pad=1, name='conv_5')
conv_5.shape

TensorShape([Dimension(None), Dimension(11), Dimension(11), Dimension(256)])

### POOL 3: 3 x 3 filters with stride = 1

In [14]:
pool_3 = max_pool_nbyn(conv_5, filter_size=3, stride=2, pad=False, name='pool_3')
pool_3.shape

TensorShape([Dimension(None), Dimension(5), Dimension(5), Dimension(256)])

### FC 6: Fully Connected Layer with 3200 neurons

In [15]:
pool_3_flattened = tf.reshape(pool_3, [-1, 5*5*256])
fc_6 = tf.nn.relu(normal_full_layer(pool_3_flattened, 3200))
fc_6.shape

TensorShape([Dimension(None), Dimension(3200)])

### FC 7: Fully Connected Layer with 3200 neurons

In [16]:
fc_7 = tf.nn.relu(normal_full_layer(fc_6, 3200))
fc_7.shape

TensorShape([Dimension(None), Dimension(3200)])

### FC 8: Final Fully Connected Layer 

In [17]:
fc_8 = normal_full_layer(fc_7, NUM_CLASSES)
fc_8.shape

TensorShape([Dimension(None), Dimension(2)])

### Classification Output

In [18]:
y_pred = fc_8
y_pred.shape

TensorShape([Dimension(None), Dimension(2)])

### Loss Function

In [19]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [22]:
pred = np.array([[ -3.87687617e+04,   5.78726172e+04],
 [ -8.46322708e+01,  -1.57727783e+04],
 [  3.12215157e+01,  -1.54957100e+04],
 [  1.07811012e+02,  -1.59461592e+04],
 [ -6.13127556e+01,  -1.57387754e+04],
 [ -3.89429531e+04,   5.78887539e+04],
 [ -3.90034453e+04,   5.85213750e+04],
 [ -1.66022705e+02,  -1.58228965e+04],
 [ -3.86717590e+02,  -1.58087080e+04],
 [  2.07964935e+02,  -1.60525752e+04],
 [ -3.92422031e+04,   5.79715508e+04],
 [ -3.88493008e+04,   5.79881914e+04],
 [ -3.97632930e+04,   5.85155039e+04],
 [ -3.89650117e+04,   5.75835781e+04],
 [ -3.91298594e+04,   5.78670352e+04],
 [ -3.88932773e+04,   5.78557070e+04],
 [ -3.93219719e+01,  -1.58440771e+04],
 [ -1.54317963e+02,  -1.59467764e+04],
 [ -3.91408672e+04,   5.83769883e+04],
 [ -3.91114141e+04,   5.77723203e+04],
 [ -5.24858704e+01,  -1.58276045e+04],
 [ -3.88121562e+04,   5.77825625e+04],
 [ -3.91034258e+04,   5.84947461e+04],
 [ -3.95017539e+04,   5.80816758e+04],
 [ -3.90695508e+04,   5.80305508e+04]])

true = np.array([[ 0.,  1.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 1.,  0.],
 [ 1.,  0.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 1.,  0.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.],
 [ 0.,  1.]])

with tf.Session() as sess:
    print(sess.run(cross_entropy, feed_dict={y_true:true, y_pred:pred}))

0.0


### Adam Optimizer

In [20]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001) # Adam optimizer
train = optimizer.minimize(cross_entropy)  # training operation

init = tf.global_variables_initializer() # global variables initializer