# Chapter 15 - Classifying images with deep CNN's

In [1]:
#1d convulution, can be done with hand or using np.convolve
import numpy as np
def conv1d(x, w, p=0, s=1):
    w_rot = np.array(w[::-1])
    x_padded = np.array(x)
    if p > 0:
        zero_pad = np.zeros(shape = p)
        x_padded = np.concatenate([zero_pad, x_padded, zero_pad])
        
    res = []
    for i in range(0, int(len(x)/s),s):
        res.append(np.sum(x_padded[i:i+w_rot.shape[0]] * w_rot))
    
    return np.array(res)

#testing
x = [1, 3, 2, 4, 5, 6, 1, 3]
w = [1, 0, 3, 1, 2]

print('Conv1d Implementation:', conv1d(x, w, p=2, s=1))
print('Numpy Results:', np.convolve(x, w, mode = 'same'))

Conv1d Implementation: [ 5. 14. 16. 26. 24. 34. 19. 22.]
Numpy Results: [ 5 14 16 26 24 34 19 22]


In [2]:
#convulation of 2x2 matrix
import numpy as np
import scipy.signal
def conv2d(X, W, p = (0,0), s = (1,1)):
    W_rot = np.array(W)[::-1,::-1]
    X_orig = np.array(X)
    n1 = X_orig.shape[0] + 2*p[0]
    n2 = X_orig.shape[1] + 2*p[1]
    X_padded = np.zeros(shape=(n1,n2))
    X_padded[p[0]:p[0] + X_orig.shape[0],
               p[1]:p[1] + X_orig.shape[1]] = X_orig
    
    res = []
    for i in range(0, int((X_padded.shape[0] - W_rot.shape[0])/s[0])+1, s[0]):
        res.append([])
        for j in range(0, int((X_padded.shape[1] - W_rot.shape[1])/s[1])+1, s[1]):
            X_sub = X_padded[i:i+W_rot.shape[0],
                            j:j+W_rot.shape[1]]
            res[-1].append(np.sum(X_sub*W_rot))
    return(np.array(res))

X = [[1,3,2,4], [5,6,1,3], [1,2,0,2], [3,4,3,2]]
W = [[1,0,3], [1,2,1], [0,1,1]]

print('Conv2d Implementation: \n',
     conv2d(X,W, p = (1,1), s = (1,1)))
print('SciPy Results:\n', scipy.signal.convolve2d(X, W, mode = 'same'))

Conv2d Implementation: 
 [[11. 25. 32. 13.]
 [19. 25. 24. 13.]
 [13. 28. 25. 17.]
 [11. 17. 14.  9.]]
SciPy Results:
 [[11 25 32 13]
 [19 25 24 13]
 [13 28 25 17]
 [11 17 14  9]]


## Subsampling

for example max/ or mean/average pooling. Pooling introduces some sort of local invariance. This means that small changes in local neighborhood do not change the results. Therefore it makes the data more robust to noise. In addition, pooling decreases the size of features, which results in higher computational efficiency.

In [3]:
import imageio
img = imageio.imread('./example-image.png', pilmode = 'RGB')
print('Image shape:', img.shape)
print('Number of channels:', img.shape[2])
print('Image data type:', img.dtype)

Image shape: (252, 221, 3)
Number of channels: 3
Image data type: uint8


RGB images can be read as unit8 as these images store values in in a range between 0,255 which is sufficient for RGB images (and computationally effective compared to 16 or 32 bit int.

## Implementing a deep convolutional NN using TF

Architecture
* Input [batchsize*28*28*1]
* Conv_1 [batchsize*24*24*32]
* Pooling_1 [batchsize*12*12*32]
* Conv_2 [batchsize*8*8*64]
* Pooling_2 [batchsize*4*4*128]
* FC_1 [batchsize*1024]
* FC_2 and softmax layer [batchsize * 10]

In [4]:
import struct
import numpy as np
import os

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels.idx1-ubyte'
                                % kind)
    images_path = os.path.join(path,
                               '%s-images.idx3-ubyte'
                               % kind)

    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII",
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images, labels


X_data, y_data = load_mnist('./MNIST', kind='train')
print('Rows: %d,  Columns: %d' % (X_data.shape[0], X_data.shape[1]))
X_test, y_test = load_mnist('./MNIST', kind='t10k')
print('Rows: %d,  Columns: %d' % (X_test.shape[0], X_test.shape[1]))

X_train, y_train = X_data[:50000,:], y_data[:50000]
X_valid, y_valid = X_data[50000:,:], y_data[50000:]

print('Training:   ', X_train.shape, y_train.shape)
print('Validation: ', X_valid.shape, y_valid.shape)
print('Test Set:   ', X_test.shape, y_test.shape)

Rows: 60000,  Columns: 784
Rows: 10000,  Columns: 784
Training:    (50000, 784) (50000,)
Validation:  (10000, 784) (10000,)
Test Set:    (10000, 784) (10000,)


In [5]:
X_data, y_data = load_mnist('./MNIST', kind='train')
print('Rows: %d,  Columns: %d' % (X_data.shape[0], X_data.shape[1]))
X_test, y_test = load_mnist('./MNIST', kind='t10k')
print('Rows: %d,  Columns: %d' % (X_test.shape[0], X_test.shape[1]))

X_train, y_train = X_data[:50000,:], y_data[:50000]
X_valid, y_valid = X_data[50000:,:], y_data[50000:]

print('Training:   ', X_train.shape, y_train.shape)
print('Validation: ', X_valid.shape, y_valid.shape)
print('Test Set:   ', X_test.shape, y_test.shape)

Rows: 60000,  Columns: 784
Rows: 10000,  Columns: 784
Training:    (50000, 784) (50000,)
Validation:  (10000, 784) (10000,)
Test Set:    (10000, 784) (10000,)


In [6]:
def batch_generator(X, y, batch_size=64, 
                    shuffle=False, random_seed=None):
    
    idx = np.arange(y.shape[0])
    
    if shuffle:
        rng = np.random.RandomState(random_seed)
        rng.shuffle(idx)
        X = X[idx]
        y = y[idx]
    
    for i in range(0, X.shape[0], batch_size):
        yield (X[i:i+batch_size, :], y[i:i+batch_size])

In [7]:
#Standardize data
mean_vals = np.mean(X_train, axis = 0)
std_vals = np.std(X_train)

X_train_centered = (X_train - mean_vals)/std_vals
X_valid_centered = (X_valid - mean_vals)/std_vals
X_test_centered = (X_test - mean_vals)/std_vals

In [8]:
import tensorflow as tf
import numpy as np


## wrapper functions 

def conv_layer(input_tensor, name,
               kernel_size, n_output_channels, 
               padding_mode='SAME', strides=(1, 1, 1, 1)):
    with tf.variable_scope(name):
        ## get n_input_channels:
        ##   input tensor shape: 
        ##   [batch x width x height x channels_in]
        input_shape = input_tensor.get_shape().as_list()
        n_input_channels = input_shape[-1] 

        weights_shape = (list(kernel_size) + 
                         [n_input_channels, n_output_channels])

        weights = tf.get_variable(name='_weights',
                                  shape=weights_shape)
        print(weights)
        biases = tf.get_variable(name='_biases',
                                 initializer=tf.zeros(
                                     shape=[n_output_channels]))
        print(biases)
        conv = tf.nn.conv2d(input=input_tensor, 
                            filter=weights,
                            strides=strides, 
                            padding=padding_mode)
        print(conv)
        conv = tf.nn.bias_add(conv, biases, 
                              name='net_pre-activation')
        print(conv)
        conv = tf.nn.relu(conv, name='activation')
        print(conv)
        
        return conv
    

## testing
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    conv_layer(x, name='convtest', kernel_size=(3, 3), n_output_channels=32)
    
del g, x

W0820 08:07:10.453307  8440 deprecation.py:506] From C:\Users\rikkr\Anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


<tf.Variable 'convtest/_weights:0' shape=(3, 3, 1, 32) dtype=float32_ref>
<tf.Variable 'convtest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("convtest/Conv2D:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/net_pre-activation:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/activation:0", shape=(?, 28, 28, 32), dtype=float32)


In [9]:
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    conv_layer(x, name = 'convtest',
              kernel_size = (3,3),
              n_output_channels = 32)

del g,x

<tf.Variable 'convtest/_weights:0' shape=(3, 3, 1, 32) dtype=float32_ref>
<tf.Variable 'convtest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("convtest/Conv2D:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/net_pre-activation:0", shape=(?, 28, 28, 32), dtype=float32)
Tensor("convtest/activation:0", shape=(?, 28, 28, 32), dtype=float32)


In [10]:
def fc_layer(input_tensor, name, 
             n_output_units, activation_fn=None):
    with tf.variable_scope(name):
        input_shape = input_tensor.get_shape().as_list()[1:]
        n_input_units = np.prod(input_shape)
        if len(input_shape) > 1:
            input_tensor = tf.reshape(input_tensor, 
                                      shape=(-1, n_input_units))

        weights_shape = [n_input_units, n_output_units]

        weights = tf.get_variable(name='_weights',
                                  shape=weights_shape)
        print(weights)
        biases = tf.get_variable(name='_biases',
                                 initializer=tf.zeros(
                                     shape=[n_output_units]))
        print(biases)
        layer = tf.matmul(input_tensor, weights)
        print(layer)
        layer = tf.nn.bias_add(layer, biases,
                              name='net_pre-activation')
        print(layer)
        if activation_fn is None:
            return layer
        
        layer = activation_fn(layer, name='activation')
        print(layer)
        return layer

    
## testing:
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, 
                       shape=[None, 28, 28, 1])
    fc_layer(x, name='fctest', n_output_units=32, 
             activation_fn=tf.nn.relu)
    
del g, x

<tf.Variable 'fctest/_weights:0' shape=(784, 32) dtype=float32_ref>
<tf.Variable 'fctest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("fctest/MatMul:0", shape=(?, 32), dtype=float32)
Tensor("fctest/net_pre-activation:0", shape=(?, 32), dtype=float32)
Tensor("fctest/activation:0", shape=(?, 32), dtype=float32)


In [11]:
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(tf.float32, shape = [None, 28,28,1])
    fc_layer(x, name = 'fctest', n_output_units=32, activation_fn = tf.nn.relu)
    
del g, x

<tf.Variable 'fctest/_weights:0' shape=(784, 32) dtype=float32_ref>
<tf.Variable 'fctest/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("fctest/MatMul:0", shape=(?, 32), dtype=float32)
Tensor("fctest/net_pre-activation:0", shape=(?, 32), dtype=float32)
Tensor("fctest/activation:0", shape=(?, 32), dtype=float32)


In [12]:
#Building the cnn
def build_cnn(learning_rate = 1e-4):
    #placeholders
    tf_x = tf.placeholder(tf.float32, shape = [None, 784], name = 'tf_x')
    tf_y = tf.placeholder(tf.int32, shape = [None], name = 'tf_y')
    
    #rehsape x to 4D Tensor
    #[batchsize, width, height, 1]
    tf_x_image = tf.reshape(tf_x, shape = [-1, 28,28,1], name = 'tf_x_reshaped')
    
    #One hot encoding
    tf_y_onehot = tf.one_hot(indices=tf_y, depth = 10, dtype = tf.float32, name = 'tf_y_onehot')
    
    ##1st layer: COnv_1
    print('\n Building 1st layer:')
    h1 = conv_layer(tf_x_image, name = 'conv_1', kernel_size = (5,5), padding_mode = 'VALID', n_output_channels = 32)
    
    ##Max pooling
    h1_pool = tf.nn.max_pool(h1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME', name = 'maxpool1')
    
    ##2nd layer: Conv_2
    print('\n Building 2nd layer:')
    h2 = conv_layer(h1_pool, name = 'conv_2', kernel_size = (5,5), padding_mode = 'VALID', n_output_channels = 64)
    
    ##Max pooling
    h2_pool = tf.nn.max_pool(h2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME', name = 'maxpool2')
    
    #3rd layer: Fully connected
    print('\nBuilding 3rd layer:')
    h3 = fc_layer(h2_pool, name = 'fc_3', n_output_units = 1024, activation_fn = tf.nn.relu)
    
    ##Dropout
    keep_prob = tf.placeholder(tf.float32, name = 'fc_keep_prob')
    h3_drop = tf.nn.dropout(h3, keep_prob = keep_prob, name = 'dropout_layer')
    
    ## 4th layer: Fully connected (linear activation)
    print('\nBuilding 4th layer:')
    h4 = fc_layer(h3_drop, name = 'fc_4', n_output_units = 10, activation_fn = None)
    
    ##Predictions
    predictions = {
        'probabilities' : tf.nn.softmax(h4, name = 'probabilities'),
        'labels': tf.cast(tf.argmax(h4, axis = 1), tf.int32, name = 'labels')
    }
    
    ##Loss function and optimzation
    cross_entropy_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits = h4, labels = tf_y_onehot),
        name = 'cross_entropy_loss')
    
    #Optimzer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    optimizer = optimizer.minimize(cross_entropy_loss, name = 'train_op')
    
    ##Computing the predictions accuracy
    correct_predictions = tf.equal(predictions['labels'], tf_y, name = 'correct_preds')
    
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name = 'accuracy')

In [13]:
def save(saver, sess, epoch, path='./model/'):
    if not os.path.isdir(path):
        os.makedirs(path)
    print('Saving model in %s' % path)
    saver.save(sess, os.path.join(path,'cnn-model.ckpt'),
               global_step=epoch)

    
def load(saver, sess, path, epoch):
    print('Loading model from %s' % path)
    saver.restore(sess, os.path.join(
            path, 'cnn-model.ckpt-%d' % epoch))

    
def train(sess, training_set, validation_set=None,
          initialize=True, epochs=20, shuffle=True,
          dropout=0.5, random_seed=None):

    X_data = np.array(training_set[0])
    y_data = np.array(training_set[1])
    training_loss = []

    ## initialize variables
    if initialize:
        sess.run(tf.global_variables_initializer())

    np.random.seed(random_seed) # for shuflling in batch_generator
    for epoch in range(1, epochs+1):
        batch_gen = batch_generator(
                        X_data, y_data, 
                        shuffle=shuffle)
        avg_loss = 0.0
        for i,(batch_x,batch_y) in enumerate(batch_gen):
            feed = {'tf_x:0': batch_x, 
                    'tf_y:0': batch_y, 
                    'fc_keep_prob:0': dropout}
            loss, _ = sess.run(
                    ['cross_entropy_loss:0', 'train_op'],
                    feed_dict=feed)
            avg_loss += loss

        training_loss.append(avg_loss / (i+1))
        print('Epoch %02d Training Avg. Loss: %7.3f' % (
            epoch, avg_loss), end=' ')
        if validation_set is not None:
            feed = {'tf_x:0': validation_set[0],
                    'tf_y:0': validation_set[1],
                    'fc_keep_prob:0':1.0}
            valid_acc = sess.run('accuracy:0', feed_dict=feed)
            print(' Validation Acc: %7.3f' % valid_acc)
        else:
            print()

            
def predict(sess, X_test, return_proba=False):
    feed = {'tf_x:0': X_test, 
            'fc_keep_prob:0': 1.0}
    if return_proba:
        return sess.run('probabilities:0', feed_dict=feed)
    else:
        return sess.run('labels:0', feed_dict=feed)

In [14]:
import tensorflow as tf
import numpy as np

## Define random seed
random_seed = 123

np.random.seed(random_seed)


## create a graph
g = tf.Graph()
with g.as_default():
    tf.set_random_seed(random_seed)
    ## build the graph
    build_cnn()
    
    #Create graph
    file_writer = tf.summary.FileWriter(logdir='./logs/', graph = g)

    ## saver:
    saver = tf.train.Saver()


 Building 1st layer:
<tf.Variable 'conv_1/_weights:0' shape=(5, 5, 1, 32) dtype=float32_ref>
<tf.Variable 'conv_1/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("conv_1/Conv2D:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/net_pre-activation:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/activation:0", shape=(?, 24, 24, 32), dtype=float32)

 Building 2nd layer:
<tf.Variable 'conv_2/_weights:0' shape=(5, 5, 32, 64) dtype=float32_ref>

W0820 08:07:10.718598  8440 deprecation.py:506] From <ipython-input-12-da56e103817f>:34: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.



<tf.Variable 'conv_2/_biases:0' shape=(64,) dtype=float32_ref>
Tensor("conv_2/Conv2D:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/net_pre-activation:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/activation:0", shape=(?, 8, 8, 64), dtype=float32)

Building 3rd layer:
<tf.Variable 'fc_3/_weights:0' shape=(1024, 1024) dtype=float32_ref>
<tf.Variable 'fc_3/_biases:0' shape=(1024,) dtype=float32_ref>
Tensor("fc_3/MatMul:0", shape=(?, 1024), dtype=float32)
Tensor("fc_3/net_pre-activation:0", shape=(?, 1024), dtype=float32)
Tensor("fc_3/activation:0", shape=(?, 1024), dtype=float32)

Building 4th layer:

W0820 08:07:10.753504  8440 deprecation.py:323] From <ipython-input-12-da56e103817f>:48: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.




<tf.Variable 'fc_4/_weights:0' shape=(1024, 10) dtype=float32_ref>
<tf.Variable 'fc_4/_biases:0' shape=(10,) dtype=float32_ref>
Tensor("fc_4/MatMul:0", shape=(?, 10), dtype=float32)
Tensor("fc_4/net_pre-activation:0", shape=(?, 10), dtype=float32)


In [15]:
## create a TF session 
## and train the CNN model

with tf.Session(graph=g) as sess:
    train(sess, 
          training_set=(X_train_centered, y_train), 
          validation_set=(X_valid_centered, y_valid), 
          initialize=True,
          random_seed=123)
    save(saver, sess, epoch=20)

Epoch 01 Training Avg. Loss: 273.930  Validation Acc:   0.975
Epoch 02 Training Avg. Loss:  74.840  Validation Acc:   0.982
Epoch 03 Training Avg. Loss:  52.032  Validation Acc:   0.986
Epoch 04 Training Avg. Loss:  39.439  Validation Acc:   0.986
Epoch 05 Training Avg. Loss:  31.870  Validation Acc:   0.988
Epoch 06 Training Avg. Loss:  26.404  Validation Acc:   0.989
Epoch 07 Training Avg. Loss:  23.792  Validation Acc:   0.988
Epoch 08 Training Avg. Loss:  20.259  Validation Acc:   0.991
Epoch 09 Training Avg. Loss:  17.445  Validation Acc:   0.989
Epoch 10 Training Avg. Loss:  16.318  Validation Acc:   0.991
Epoch 11 Training Avg. Loss:  13.152  Validation Acc:   0.992
Epoch 12 Training Avg. Loss:  12.091  Validation Acc:   0.991
Epoch 13 Training Avg. Loss:  10.568  Validation Acc:   0.992
Epoch 14 Training Avg. Loss:  10.421  Validation Acc:   0.992
Epoch 15 Training Avg. Loss:   8.069  Validation Acc:   0.993
Epoch 16 Training Avg. Loss:   7.446  Validation Acc:   0.992
Epoch 17

In [16]:
#Restore a saved model, delete graph g, create a new graph g2 and reload the trained model to do predictions

### Calculate prediction accuracy on test set
### Restoring the saved model

del g

## Create graph
## and build the model

g2 = tf.Graph()
with g2.as_default():
    tf.set_random_seed(random_seed)
    ##Build the graph
    build_cnn()
    
    ##saver:
    saver = tf.train.Saver()
    
#Create a new session
#And restore the model
with tf.Session(graph = g2) as sess:
    load(saver, sess, epoch = 20, path = './model')
    
    preds = predict(sess, X_test_centered, return_proba = False)
    print('Test Accuracy: %.3f%%' % (100 * np.sum(preds == y_test)/len(y_test)))


 Building 1st layer:
<tf.Variable 'conv_1/_weights:0' shape=(5, 5, 1, 32) dtype=float32_ref>
<tf.Variable 'conv_1/_biases:0' shape=(32,) dtype=float32_ref>
Tensor("conv_1/Conv2D:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/net_pre-activation:0", shape=(?, 24, 24, 32), dtype=float32)
Tensor("conv_1/activation:0", shape=(?, 24, 24, 32), dtype=float32)

 Building 2nd layer:
<tf.Variable 'conv_2/_weights:0' shape=(5, 5, 32, 64) dtype=float32_ref>
<tf.Variable 'conv_2/_biases:0' shape=(64,) dtype=float32_ref>
Tensor("conv_2/Conv2D:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/net_pre-activation:0", shape=(?, 8, 8, 64), dtype=float32)
Tensor("conv_2/activation:0", shape=(?, 8, 8, 64), dtype=float32)

Building 3rd layer:
<tf.Variable 'fc_3/_weights:0' shape=(1024, 1024) dtype=float32_ref>
<tf.Variable 'fc_3/_biases:0' shape=(1024,) dtype=float32_ref>
Tensor("fc_3/MatMul:0", shape=(?, 1024), dtype=float32)
Tensor("fc_3/net_pre-activation:0", shape=(?, 1024), dtype=float32

W0820 08:08:52.683917  8440 deprecation.py:323] From C:\Users\rikkr\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Loading model from ./model
Test Accuracy: 99.290%


In [17]:
#Predicted labels and probabilities
##run the prediction on some test samples
np.set_printoptions(precision = 2, suppress = True)

with tf.Session(graph = g2) as sess:
    load(saver, sess, epoch = 20, path = './model')
    
    print(predict(sess, X_test_centered[:10], return_proba = False))
    print(predict(sess, X_test_centered[:10], return_proba = True))

Loading model from ./model
[7 2 1 0 4 1 4 9 5 9]
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [18]:
## continue training for 20 more epochs
## without re-initializzing :: initizalize - false
# Create a new session and restore the model

with tf.Session(graph = g2) as sess:
    load(saver, sess, epoch = 20, path = './model')
    
    train(sess,
         training_set = (X_train_centered, y_train),
         validation_set = (X_valid_centered, y_valid),
          initialize = False,
          epochs = 20,
          random_seed = 123)
    
    save(saver, sess, epoch = 40, path = './model')
    
    preds = predict(sess, X_test_centered, return_proba = False)
    
    print('Test Accuracy: %.3f%%' % (100 * np.sum(preds == y_test)/len(y_test)))

Loading model from ./model
Epoch 01 Training Avg. Loss:   4.349  Validation Acc:   0.993
Epoch 02 Training Avg. Loss:   4.063  Validation Acc:   0.992
Epoch 03 Training Avg. Loss:   4.028  Validation Acc:   0.993
Epoch 04 Training Avg. Loss:   3.596  Validation Acc:   0.993
Epoch 05 Training Avg. Loss:   3.208  Validation Acc:   0.993
Epoch 06 Training Avg. Loss:   3.474  Validation Acc:   0.993
Epoch 07 Training Avg. Loss:   2.720  Validation Acc:   0.991
Epoch 08 Training Avg. Loss:   3.014  Validation Acc:   0.992
Epoch 09 Training Avg. Loss:   1.978  Validation Acc:   0.992
Epoch 10 Training Avg. Loss:   2.454  Validation Acc:   0.993
Epoch 11 Training Avg. Loss:   2.115  Validation Acc:   0.991
Epoch 12 Training Avg. Loss:   1.907  Validation Acc:   0.992
Epoch 13 Training Avg. Loss:   2.296  Validation Acc:   0.993
Epoch 14 Training Avg. Loss:   2.360  Validation Acc:   0.993
Epoch 15 Training Avg. Loss:   1.680  Validation Acc:   0.993
Epoch 16 Training Avg. Loss:   2.313  Valid

In [19]:
# Implementing a CNN in the tensorflow layers API

import tensorflow as tf
import numpy as np


class ConvNN(object):
    def __init__(self, batchsize=64,
                 epochs=20, learning_rate=1e-4, 
                 dropout_rate=0.5,
                 shuffle=True, random_seed=None):
        np.random.seed(random_seed)
        self.batchsize = batchsize
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.shuffle = shuffle
                
        g = tf.Graph()
        with g.as_default():
            ## set random-seed:
            tf.set_random_seed(random_seed)
            
            ## build the network:
            self.build()

            ## initializer
            self.init_op = \
                tf.global_variables_initializer()

            ## saver
            self.saver = tf.train.Saver()
            
        ## create a session
        self.sess = tf.Session(graph=g)
                
    def build(self):
        
        ## Placeholders for X and y:
        tf_x = tf.placeholder(tf.float32, 
                              shape=[None, 784],
                              name='tf_x')
        tf_y = tf.placeholder(tf.int32, 
                              shape=[None],
                              name='tf_y')
        is_train = tf.placeholder(tf.bool, 
                              shape=(),
                              name='is_train')

        ## reshape x to a 4D tensor: 
        ##  [batchsize, width, height, 1]
        tf_x_image = tf.reshape(tf_x, shape=[-1, 28, 28, 1],
                              name='input_x_2dimages')
        ## One-hot encoding:
        tf_y_onehot = tf.one_hot(indices=tf_y, depth=10,
                              dtype=tf.float32,
                              name='input_y_onehot')

        ## 1st layer: Conv_1
        h1 = tf.layers.conv2d(tf_x_image, 
                              kernel_size=(5, 5), 
                              filters=32, 
                              activation=tf.nn.relu)
        ## MaxPooling
        h1_pool = tf.layers.max_pooling2d(h1, 
                              pool_size=(2, 2), 
                              strides=(2, 2))
        ## 2n layer: Conv_2
        h2 = tf.layers.conv2d(h1_pool, kernel_size=(5,5), 
                              filters=64, 
                              activation=tf.nn.relu)
        ## MaxPooling 
        h2_pool = tf.layers.max_pooling2d(h2, 
                              pool_size=(2, 2), 
                              strides=(2, 2))

        ## 3rd layer: Fully Connected
        input_shape = h2_pool.get_shape().as_list()
        n_input_units = np.prod(input_shape[1:])
        h2_pool_flat = tf.reshape(h2_pool, 
                              shape=[-1, n_input_units])
        h3 = tf.layers.dense(h2_pool_flat, 1024, 
                              activation=tf.nn.relu)

        ## Dropout
        h3_drop = tf.layers.dropout(h3, 
                              rate=self.dropout_rate,
                              training=is_train)
        
        ## 4th layer: Fully Connected (linear activation)
        h4 = tf.layers.dense(h3_drop, 10, 
                              activation=None)

        ## Prediction
        predictions = {
            'probabilities': tf.nn.softmax(h4, 
                              name='probabilities'),
            'labels': tf.cast(tf.argmax(h4, axis=1), 
                              tf.int32, name='labels')}
        
        ## Loss Function and Optimization
        cross_entropy_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=h4, labels=tf_y_onehot),
            name='cross_entropy_loss')
        
        ## Optimizer
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        optimizer = optimizer.minimize(cross_entropy_loss,
                                       name='train_op')

        ## Finding accuracy
        correct_predictions = tf.equal(
            predictions['labels'], 
            tf_y, name='correct_preds')
        
        accuracy = tf.reduce_mean(
            tf.cast(correct_predictions, tf.float32),
            name='accuracy')

    def save(self, epoch, path='./tflayers-model/'):
        if not os.path.isdir(path):
            os.makedirs(path)
        print('Saving model in %s' % path)
        self.saver.save(self.sess, 
                        os.path.join(path, 'model.ckpt'),
                        global_step=epoch)
        
    def load(self, epoch, path):
        print('Loading model from %s' % path)
        self.saver.restore(self.sess, 
             os.path.join(path, 'model.ckpt-%d' % epoch))
        
    def train(self, training_set, 
              validation_set=None,
              initialize=True):
        ## initialize variables
        if initialize:
            self.sess.run(self.init_op)

        self.train_cost_ = []
        X_data = np.array(training_set[0])
        y_data = np.array(training_set[1])

        for epoch in range(1, self.epochs + 1):
            batch_gen = \
                batch_generator(X_data, y_data, 
                                 shuffle=self.shuffle)
            avg_loss = 0.0
            for i, (batch_x,batch_y) in \
                enumerate(batch_gen):
                feed = {'tf_x:0': batch_x, 
                        'tf_y:0': batch_y,
                        'is_train:0': True} ## for dropout
                loss, _ = self.sess.run(
                        ['cross_entropy_loss:0', 'train_op'], 
                        feed_dict=feed)
                avg_loss += loss
                
            print('Epoch %02d: Training Avg. Loss: '
                  '%7.3f' % (epoch, avg_loss), end=' ')
            if validation_set is not None:
                feed = {'tf_x:0': batch_x, 
                        'tf_y:0': batch_y,
                        'is_train:0': False} ## for dropout
                valid_acc = self.sess.run('accuracy:0',
                                          feed_dict=feed)
                print('Validation Acc: %7.3f' % valid_acc)
            else:
                print()
                    
    def predict(self, X_test, return_proba = False):
        feed = {'tf_x:0': X_test,
                'is_train:0': False} ## for dropout
        if return_proba:
            return self.sess.run('probabilities:0',
                                 feed_dict=feed)
        else:
            return self.sess.run('labels:0',
                                 feed_dict=feed)




In [20]:
cnn = ConvNN(random_seed=123)

#Train and save after 20 epochs
cnn.train(training_set=(X_train_centered, y_train), 
          validation_set=(X_valid_centered, y_valid))

cnn.save(epoch=20)


W0820 08:10:31.195315  8440 deprecation.py:323] From <ipython-input-19-8f6df128752e>:63: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
W0820 08:10:31.443673  8440 deprecation.py:323] From <ipython-input-19-8f6df128752e>:67: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
W0820 08:10:31.566291  8440 deprecation.py:323] From <ipython-input-19-8f6df128752e>:83: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
W0820 08:10:31.795825  8440 deprecation.py:323] From <ipython-input-19-8f6df128752e>:88: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.la

Epoch 01: Training Avg. Loss: 268.570 Validation Acc:   0.875
Epoch 02: Training Avg. Loss:  74.659 Validation Acc:   0.938
Epoch 03: Training Avg. Loss:  52.260 Validation Acc:   0.938
Epoch 04: Training Avg. Loss:  39.475 Validation Acc:   1.000
Epoch 05: Training Avg. Loss:  33.259 Validation Acc:   1.000
Epoch 06: Training Avg. Loss:  27.794 Validation Acc:   1.000
Epoch 07: Training Avg. Loss:  22.607 Validation Acc:   1.000
Epoch 08: Training Avg. Loss:  20.239 Validation Acc:   1.000
Epoch 09: Training Avg. Loss:  17.094 Validation Acc:   1.000
Epoch 10: Training Avg. Loss:  14.963 Validation Acc:   1.000
Epoch 11: Training Avg. Loss:  13.567 Validation Acc:   1.000
Epoch 12: Training Avg. Loss:  11.026 Validation Acc:   0.938
Epoch 13: Training Avg. Loss:  10.850 Validation Acc:   1.000
Epoch 14: Training Avg. Loss:   9.199 Validation Acc:   1.000
Epoch 15: Training Avg. Loss:   8.078 Validation Acc:   1.000
Epoch 16: Training Avg. Loss:   7.241 Validation Acc:   1.000
Epoch 17

In [22]:
preds = cnn.predict(X_test_centered)

print('Test Accuracy: %.2f%%' % (100*
      np.sum(y_test == preds)/len(y_test)))

Test Accuracy: 99.35%
