## Convolutional Neural Network models

In [1]:
import tensorflow as tf
import numpy as np
import cifar_data as cf

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(inp, W, S):
    return tf.nn.conv2d(inp, W, strides=[1,S,S,1], padding='SAME')

def conv_layer(inp, shape, stride):
    W = weight_variable(shape)
    b = bias_variable([shape[3]])
    return tf.nn.relu(conv2d(inp, W, stride) + b)

def pool_layer(inp, F, S):
    return tf.nn.max_pool(inp, ksize=[1,F,F,1], strides=[1,S,S,1], padding='SAME')

def full_layer(inp, dim):
    in_size = int(inp.get_shape()[1])
    W = weight_variable([in_size, dim])
    b = bias_variable([dim])
    return tf.matmul(inp, W) + b

### MNIST - Model

| layer   | description    | length     | dimensions(shape)                   | hyperparameters (other than length)  | 
|:-------:|:--------------:|:----------:|:-----------------------------------:|:------------------------------------:|
|         | Input          | n/a(batch) | 784                                 |                                      |
| Layer 1 | Input(reshaped)| colorbar: 1| 28 x 28                             |                                      |
| Layer 2 | Convolutional  | \*32       | weights(5 x 5 x 1), output(28 x 28) | fieldsize=\*5, stride=\*1, pad=2     |
| Layer 3 | Pooling        | 32         | output(14 x 14)                     | fieldsize=\*2, stride=\*2, pad=0     |
| Layer 4 | Convolutional  | \*64       | weights(5 x 5 x 32), output(14 x 14)| (same as layer 2)                    |
| Layer 5 | Pooling        | 64         | output(7 x 7)                       | (same as layer 3)                    |
| Layer 6 | Fully-Connected|            | input (3136)  output (\*1024)       | with dropout, keep_prob=\*0.5        | 
| Layer 7 | Output         |            | input (1024)  output(10)            | with dropout, keep_prob=\*0.5        |
|         |                |\* value set by user                              |

#### Load Input Data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
XShape = mnist.train.images.shape
yShape = mnist.train.labels.shape
print ('XShape:', XShape) 
print ('yShape:', yShape)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
XShape: (55000, 784)
yShape: (55000, 10)


#### Network Settings

In [3]:
len2, len4 = 32, 64
F_A, S_A = 5, 1
F_B, S_B = 2, 2
inp_shape = [None, XShape[1]]
spatial_dim, dpth = int(np.sqrt(XShape[1])), 1
reshape = [-1, spatial_dim, spatial_dim, dpth]
shp2 = [F_A, F_A, dpth, len2]
shp4 = [F_A, F_A, len2, len4]
shp6out = 1024
ydim = yShape[1]

#### Build Network

In [4]:
inp = tf.placeholder(tf.float32, inp_shape)
layer1 = tf.reshape(inp, reshape)
layer2 = conv_layer(layer1, shp2, S_A)
layer3 = pool_layer(layer2, F_B, S_B)
layer4 = conv_layer(layer3, shp4, S_A)
layer5 = pool_layer(layer4, F_B, S_B)

keep_prob = tf.placeholder(tf.float32)
shp6in = [ -1, int(layer5.shape[1]) * int(layer5.shape[2]) * int(layer5.shape[3]) ]
flattened = tf.reshape(layer5, shp6in)
droppedA = tf.nn.dropout(flattened, keep_prob=keep_prob)
layer6 = tf.nn.relu(full_layer(droppedA, shp6out))

droppedB = tf.nn.dropout(layer6, keep_prob=keep_prob) 
layer7 = full_layer(droppedB, ydim)

print (inp, '\n', layer2, '\n', layer3, '\n', layer4, '\n', layer5)
print (layer6, '\n', layer7)

yShape = [None, ydim]
lbls = tf.placeholder(tf.float32, shape=yShape)
results = tf.nn.softmax_cross_entropy_with_logits(logits=layer7, labels=lbls)
cross_entropy = tf.reduce_mean(results)
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(layer7, 1), tf.argmax(lbls, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Tensor("Placeholder:0", shape=(?, 784), dtype=float32) 
 Tensor("Relu:0", shape=(?, 28, 28, 32), dtype=float32) 
 Tensor("MaxPool:0", shape=(?, 14, 14, 32), dtype=float32) 
 Tensor("Relu_1:0", shape=(?, 14, 14, 64), dtype=float32) 
 Tensor("MaxPool_1:0", shape=(?, 7, 7, 64), dtype=float32)
Tensor("Relu_2:0", shape=(?, 1024), dtype=float32) 
 Tensor("add_3:0", shape=(?, 10), dtype=float32)


#### Train Model

In [5]:
batchSize = 50
numSteps = 901  # recommended 5000
nTest = 1000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(numSteps):
        batch = mnist.train.next_batch(batchSize)
        
        # display accuracy on in-sample images (every 100 steps)
        if i % 100 == 0:
            train_accuracy = sess.run(accuracy, feed_dict={inp: batch[0], lbls: batch[1],
                                                           keep_prob: 1.0})
            print("step {}, training accuracy {}".format(i, train_accuracy))
            
        # train next batch
        sess.run(train_step, feed_dict={inp: batch[0], lbls: batch[1], keep_prob: 0.5})
        
    # test accuracy on out-of-sample images
    X = mnist.test.images.reshape(ydim, nTest, XShape[1])
    Y = mnist.test.labels.reshape(ydim, nTest, ydim)
    acc = np.mean([sess.run(accuracy, feed_dict={inp: X[i], lbls: Y[i], keep_prob: 1.0}) 
                   for i in range(ydim)]) 
    
print("test accuracy: {}".format(acc))

step 0, training accuracy 0.05999999865889549
step 100, training accuracy 0.6200000047683716
step 200, training accuracy 0.9200000166893005
step 300, training accuracy 0.8799999952316284
step 400, training accuracy 0.8600000143051147
step 500, training accuracy 0.8999999761581421
step 600, training accuracy 0.8600000143051147
step 700, training accuracy 0.9800000190734863
step 800, training accuracy 0.8999999761581421
step 900, training accuracy 0.8799999952316284
test accuracy: 0.947100043296814


### CIFAR - Model 1

| layer   | description    | length     | dimensions(shape)                   | hyperparameters (other than length)  | 
|:-------:|:--------------:|:----------:|:-----------------------------------:|:------------------------------------:|
| Layer 1 | Input          | colorbar: 3| 32 x 32 x 3                         |                                      |
| Layer 2 | Convolutional  | \*32       | weights(5 x 5 x 3), output(32 x 32) | fieldsize=\*5, stride=\*1, pad=2     |
| Layer 3 | Pooling        | 32         | output(16 x 16)                     | fieldsize=\*2, stride=\*2, pad=0     |
| Layer 4 | Convolutional  | \*64       | weights(5 x 5 x 32), output(16 x 16)| (same as layer 2)                    |
| Layer 5 | Pooling        | 64         | output(8 x 8)                       | (same as layer 3)                    |
| Layer 6 | Convolutional  | \*128      | weights(5 x 5 x 64), output(8 x 8)  | (same as layer 2)                    |
| Layer 7 | Pooling        | 128        | output(4 x 4)                       | (same as layer 3)                    |
| Layer 8 | Fully-Connected|            | input (2048)  output (\*512)        | with dropout, keep_prob=\*0.5        | 
| Layer 9 | Output         |            | input (512)  output(10)             | with dropout, keep_prob=\*0.5        |
|         |                |\* value set by user                              |

#### Load Input Data

In [7]:
import cifar_data as cf
d = cf.CifarDataManager()
images = d.train.images
XShape = d.train.images.shape
yShape = d.train.labels.shape
print ('XShape:', XShape)
print ('yShape:', yShape)

XShape: (50000, 32, 32, 3)
yShape: (50000, 10)


#### Network Settings

In [8]:
F_A, S_A = 5, 1
F_B, S_B = 2, 2
len2, len4, len6  = 32, 64, 128
shp1 = [None, XShape[1], XShape[2], XShape[3]]
shp2 = [F_A, F_A, XShape[3], len2]
shp4 = [F_A, F_A, len2, len4]
shp6 = [F_A, F_A, len4, len6]
shp8out = 512
ydim = yShape[1]

#### Build Network

In [9]:
inp = tf.placeholder(tf.float32, shape=shp1)
layer2 = conv_layer(inp, shp2, S_A)
layer3 = pool_layer(layer2, F_B, S_B)
layer4 = conv_layer(layer3, shp4, S_A)
layer5 = pool_layer(layer4, F_B, S_B)
layer6 = conv_layer(layer5, shp6, S_A)
layer7 = pool_layer(layer6, F_B, S_B)

keep_prob = tf.placeholder(tf.float32)
shp8in = [ -1, int(layer7.shape[1]) * int(layer7.shape[2]) * int(layer7.shape[3]) ]
flattened = tf.reshape(layer7, shp8in)
droppedA = tf.nn.dropout(flattened, keep_prob=keep_prob)
layer8 = tf.nn.relu(full_layer(droppedA, shp8out))

droppedB = tf.nn.dropout(layer8, keep_prob=keep_prob) 
layer9 = full_layer(droppedB, ydim)

print (inp, '\n', layer2, '\n', layer3, '\n', layer4, '\n', layer5)
print (layer6, '\n', layer7, '\n', layer8, '\n', layer9)

yShape = [None, ydim]
lbls = tf.placeholder(tf.float32, shape=yShape)
results = tf.nn.softmax_cross_entropy_with_logits(logits=layer9, labels=lbls)
cross_entropy = tf.reduce_mean(results)
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(layer9, 1), tf.argmax(lbls, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Tensor("Placeholder_3:0", shape=(?, 32, 32, 3), dtype=float32) 
 Tensor("Relu_3:0", shape=(?, 32, 32, 32), dtype=float32) 
 Tensor("MaxPool_2:0", shape=(?, 16, 16, 32), dtype=float32) 
 Tensor("Relu_4:0", shape=(?, 16, 16, 64), dtype=float32) 
 Tensor("MaxPool_3:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("Relu_5:0", shape=(?, 8, 8, 128), dtype=float32) 
 Tensor("MaxPool_4:0", shape=(?, 4, 4, 128), dtype=float32) 
 Tensor("Relu_6:0", shape=(?, 512), dtype=float32) 
 Tensor("add_8:0", shape=(?, 10), dtype=float32)


#### Train Model

In [10]:
batchSize = 50
numSteps = 501  # recommended 500000
nTest = 1000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(numSteps):
        batch = d.train.next_batch(batchSize)
        
        # display accuracy on in-sample images (every 100 steps)
        if i % 100 == 0:
            train_accuracy = sess.run(accuracy, feed_dict={inp: batch[0], lbls: batch[1],
                                                           keep_prob: 1.0})
            print("step {}, training accuracy {}".format(i, train_accuracy))
            
        # train next batch
        sess.run(train_step, feed_dict={inp: batch[0], lbls: batch[1], keep_prob: 0.5})
        
    # test accuracy on out-of-sample images
    X = d.test.images.reshape(ydim, nTest, XShape[1], XShape[2], XShape[3])
    Y = d.test.labels.reshape(ydim, nTest, ydim)
    acc = np.mean([sess.run(accuracy, feed_dict={inp: X[i], lbls: Y[i], keep_prob: 1.0}) 
                   for i in range(ydim)]) 
    
print("test accuracy: {}".format(acc))

step 0, training accuracy 0.019999999552965164
step 100, training accuracy 0.2800000011920929
step 200, training accuracy 0.3400000035762787
step 300, training accuracy 0.30000001192092896
step 400, training accuracy 0.4000000059604645
step 500, training accuracy 0.5799999833106995
test accuracy: 0.4301000237464905


### CIFAR - Model II

note: length parameters match with example from text (not github site, https://github.com/Hezi-Resheff/Oreilly-Learning-TensorFlow)

| layer   | description    | length     | dimensions(shape)                   | hyperparameters (other than length)  | 
|:-------:|:--------------:|:----------:|:-----------------------------------:|:------------------------------------:|
| Layer 1 | Input          | colorbar: 3| 32 x 32 x 3                         |                                      |
| Layer 2a| Convolutional  | \*30       | weights(3 x 3 x 3), output(32 x 32) | fieldsize=\*3, stride=\*1, pad=1     |
| Layer 2b| Convolutional  | \*30       | weights(3 x 3 x 30), output(32 x 32)| same as 2a                           |
| Layer 2c| Convolutional  | \*30       | weights(3 x 3 x 30), output(32 x 32)| same as 2a                           |
| Layer 2d| Pooling        | 30         | output(16 x 16)                     | fieldsize=\*2, stride=\*2, pad=0     |
| Layer 3a| Convolutional  | \*50       | weights(3 x 3 x 30), output(16 x 16)| same as 2a                           |
| Layer 3b| Convolutional  | \*50       | weights(3 x 3 x 50), output(16 x 16)| same as 2a                           |
| Layer 3c| Convolutional  | \*50       | weights(3 x 3 x 50), output(16 x 16)| same as 2a                           |
| Layer 3d| Pooling        | 50         | output(8 x 8)                       | same as 2d                           |
| Layer 4a| Convolutional  | \*80       | weights(3 x 3 x 50), output(8 x 8)  | same as 2a                           |
| Layer 4b| Convolutional  | \*80       | weights(3 x 3 x 80), output(8 x 8)  | same as 2a                           |
| Layer 4c| Convolutional  | \*80       | weights(3 x 3 x 80), output(8 x 8)  | same as 2a                           |
| Layer 4d| Pooling        | 80         | output(1 x 1)                       | fieldsize=\*8, stride=\*8, pad=0     |
| Layer 5 | Fully-Connected|     	    | input (80) output (\*500)         | with dropout, keep_prob=\*0.5        |
| Layer 6 | Output         |            | input (500)  output(10)             | with dropout, keep_prob=\*0.5        |
|         |                |\* value set by user                              |

#### Load Input Data

In [11]:
d = cf.CifarDataManager()
images = d.train.images
XShape = d.train.images.shape
yShape = d.train.labels.shape
print ('XShape:', XShape)
print ('yShape:', yShape)

XShape: (50000, 32, 32, 3)
yShape: (50000, 10)


#### Network Setting

In [12]:
F_A, S_A = 3, 1
F_B, S_B = 2, 2
F_C, S_C = 8, 8
len2, len3, len4 = 30, 50, 80
shp1 = [None, XShape[1], XShape[2], XShape[3]]
shp2a = [F_A, F_A, XShape[3], len2]
shp2b = [F_A, F_A, len2, len2]
shp2c = [F_A, F_A, len2, len2]
shp3a = [F_A, F_A, len2, len3]
shp3b = [F_A, F_A, len3, len3]
shp3c = [F_A, F_A, len3, len3]
shp4a = [F_A, F_A, len3, len4]
shp4b = [F_A, F_A, len4, len4]
shp4c = [F_A, F_A, len4, len4]
shp5in = [-1, len4]
shp5out = 500
ydim = yShape[1]

#### Build Network

In [13]:
inp = tf.placeholder(tf.float32, shape=shp1)
layer2a = conv_layer(inp, shp2a, S_A)
layer2b = conv_layer(layer2a, shp2b, S_A)
layer2c = conv_layer(layer2b, shp2c, S_A)
layer2d = pool_layer(layer2c, F_B, S_B)

keep_prob = tf.placeholder(tf.float32)
droppedA = tf.nn.dropout(layer2d, keep_prob=keep_prob) 
layer3a = conv_layer(droppedA, shp3a, S_A)
layer3b = conv_layer(layer3a, shp3b, S_A)
layer3c = conv_layer(layer3b, shp3c, S_A)
layer3d = pool_layer(layer3c, F_B, S_B)

droppedB = tf.nn.dropout(layer3d, keep_prob=keep_prob) 
layer4a = conv_layer(droppedB, shp4a, S_A)
layer4b = conv_layer(layer4a, shp4b, S_A)
layer4c = conv_layer(layer4b, shp4c, S_A)
layer4d = pool_layer(layer4c, F_C, S_C)

flattened = tf.reshape(layer4d, shp5in)
droppedC = tf.nn.dropout(flattened, keep_prob=keep_prob)
layer5 = tf.nn.relu(full_layer(droppedC, shp5out))

droppedD = tf.nn.dropout(layer5, keep_prob=keep_prob)
layer6 =  full_layer(droppedD, ydim)

print (inp, '\n', layer2a, '\n', layer2b, '\n', layer2c, '\n', layer2d)
print (layer3a, '\n', layer3b, '\n', layer3c, '\n', layer3d, '\n', layer4a)
print (layer4b, '\n', layer4c, '\n', layer4d, '\n', layer5, '\n', layer6)

yShape = [None, ydim]
lbls = tf.placeholder(tf.float32, shape=yShape)
results = tf.nn.softmax_cross_entropy_with_logits(logits=layer6, labels=lbls)
cross_entropy = tf.reduce_mean(results)
train_step = tf.train.AdamOptimizer(5e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(layer6, 1), tf.argmax(lbls, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Tensor("Placeholder_6:0", shape=(?, 32, 32, 3), dtype=float32) 
 Tensor("Relu_7:0", shape=(?, 32, 32, 30), dtype=float32) 
 Tensor("Relu_8:0", shape=(?, 32, 32, 30), dtype=float32) 
 Tensor("Relu_9:0", shape=(?, 32, 32, 30), dtype=float32) 
 Tensor("MaxPool_5:0", shape=(?, 16, 16, 30), dtype=float32)
Tensor("Relu_10:0", shape=(?, 16, 16, 50), dtype=float32) 
 Tensor("Relu_11:0", shape=(?, 16, 16, 50), dtype=float32) 
 Tensor("Relu_12:0", shape=(?, 16, 16, 50), dtype=float32) 
 Tensor("MaxPool_6:0", shape=(?, 8, 8, 50), dtype=float32) 
 Tensor("Relu_13:0", shape=(?, 8, 8, 80), dtype=float32)
Tensor("Relu_14:0", shape=(?, 8, 8, 80), dtype=float32) 
 Tensor("Relu_15:0", shape=(?, 8, 8, 80), dtype=float32) 
 Tensor("MaxPool_7:0", shape=(?, 1, 1, 80), dtype=float32) 
 Tensor("Relu_16:0", shape=(?, 500), dtype=float32) 
 Tensor("add_19:0", shape=(?, 10), dtype=float32)


#### Train Model

In [14]:
batchSize = 50
numSteps = 501  # recommended 500000
nTest = 1000
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(numSteps):
        batch = d.train.next_batch(batchSize)
        
        # display accuracy on in-sample images (every 100 steps)
        if i % 100 == 0:
            train_accuracy = sess.run(accuracy, feed_dict={inp: batch[0], lbls: batch[1],
                                                           keep_prob: 1.0})
            print("step {}, training accuracy {}".format(i, train_accuracy))
            
        # train next batch
        sess.run(train_step, feed_dict={inp: batch[0], lbls: batch[1], keep_prob: 0.5})
        
    # test accuracy on out-of-sample images
    X = d.test.images.reshape(ydim, nTest, XShape[1], XShape[2], XShape[3])
    Y = d.test.labels.reshape(ydim, nTest, ydim)
    acc = np.mean([sess.run(accuracy, feed_dict={inp: X[i], lbls: Y[i], keep_prob: 1.0}) 
                   for i in range(ydim)]) 
    
print("test accuracy: {}".format(acc))

step 0, training accuracy 0.03999999910593033
step 100, training accuracy 0.11999999731779099
step 200, training accuracy 0.019999999552965164
step 300, training accuracy 0.14000000059604645
step 400, training accuracy 0.10000000149011612
step 500, training accuracy 0.10000000149011612
test accuracy: 0.10000000149011612
