# CIFAR-10 Convolutional neural network

## Exercise - Load data

> **Exercise**: Load the CIFAR-10 data. Normalize the images and split them into train, validation and test sets. Define a `get_batches(X, y, batch_size)` function to generate random X/y batches of size `batch_size` using a Python generator.

In [1]:
import numpy as np
import os

# loading the data
with np.load(os.path.join('data', 'cifar10-10k.npz'), allow_pickle=False) as npz_file:
    cifar10 = dict(npz_file.items())


# changing the data type from uint8 to float32
data = cifar10['data'].astype(np.float32)


# standardize the images
data = (data-128)/255

from sklearn.model_selection import train_test_split

# Split into train and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(
    # Reshape images (32 by 32)
    data.reshape(-1, 32, 32, 3), # 3 channels RGB
    cifar10['labels'],
    test_size=500, random_state=0
)

# Print shape
print('Train:', X_train.shape, y_train.shape)
# Returns: (19500, 28, 28, 1) (19500,)
print('Valid:', X_valid.shape, y_valid.shape)
# Returns: (500, 28, 28, 1) (500,)

Train: (9500, 32, 32, 3) (9500,)
Valid: (500, 32, 32, 3) (500,)


In [2]:
# Batch generator
def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y)) # 1,2,...,n
    np.random.shuffle(shuffled_idx)

    # Enumerate indexes by steps of batch_size
    # i: 0, b, 2b, 3b, 4b, .. where b is the batch size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]


## Exercise - Create and train a ConvNet

> **Exercise:** Create a convolutional neural network and train it using your batch generator. Evaluate the accuracy on the validation set after each epoch. Test different architectures and parameters. Evaluate your best network on the test set. Save the trained kernel weights of the first convolutional layer in a variable.

In [3]:
import tensorflow as tf

# Redefine graph
graph = tf.Graph()

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
    y = tf.placeholder(dtype=tf.int32, shape=[None])

    hidden = tf.layers.conv2d(
        X, # Input data
        filters=128, # 64 filters
        kernel_size=(5, 5), # Kernel size: 5x5
        strides=(2, 2), # Stride: 2
        padding='SAME', # "same" padding
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.truncated_normal_initializer(
            stddev=0.01, seed=0), # Small standard deviation
        name='hidden' # Add name
    )    
    
    # Convolutional layer
    #conv = tf.layers.conv2d(
    #    X, # Input data
    #    filters=64, # 64 filters
    #    kernel_size=(5, 5), # Kernel size: 5x5
    #    strides=(2, 2), # Stride: 2
    #    padding='SAME', # "same" padding
    #    activation=tf.nn.relu, # ReLU
    #    kernel_initializer=tf.truncated_normal_initializer(
    #        stddev=0.01, seed=0), # Small standard deviation
    #    name='conv' # Add name
    #)

print(hidden.shape) # Prints: (?, 14, 14, 16)

with graph.as_default():
    # Max pooling layer
    pool = tf.layers.max_pooling2d(
        hidden,
#        conv, # Convolution output
        pool_size=(2, 2), # Pool size: 2
        strides=(2, 2), # Stride: 2
        padding='SAME' # "same" padding
    )

print(pool.shape) # Prints: (?, 7, 7, 16)

with graph.as_default():
    # Convolutional layer
    conv2 = tf.layers.conv2d(
        pool, # Max pooling output
        filters=16, # 16 filters
        kernel_size=(3, 3), # Kernel size: 3x3
        strides=(1, 1), # Stride: 1
        padding='SAME', # "same" padding
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.truncated_normal_initializer(
            stddev=0.01, seed=0), # Small standard deviation
        name='conv2' # Add name
    )

    # Max pooling layer (2x2, stride: 2)
    pool2 = tf.layers.max_pooling2d(
        conv2, pool_size=(2, 2), strides=(2, 2), padding='SAME')

print(conv2.shape)
print(pool2.shape)


# adding a forth layer
with graph.as_default():
    # Convolutional layer
    conv3 = tf.layers.conv2d(
        pool2, # Max pooling output
        filters=8, # 16 filters
        kernel_size=(3, 3), # Kernel size: 3x3
        strides=(1, 1), # Stride: 1
        padding='SAME', # "same" padding
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.truncated_normal_initializer(
            stddev=0.01, seed=0), # Small standard deviation
        name='conv3' # Add name
    )

    # Max pooling layer (2x2, stride: 2)
    pool3 = tf.layers.max_pooling2d(
        conv3, pool_size=(2, 2), strides=(2, 2), padding='SAME')

print(conv3.shape)
print(pool3.shape)




with graph.as_default():
    # Flatten output
    flat_output = tf.contrib.layers.flatten(pool2)

print(flat_output.shape)


# adding the dropout
with graph.as_default():
    # placeholder
    training = tf.placeholder(dtype=tf.bool)
    
    # apply dropout
    hidden = tf.layers.dropout(
        hidden, rate=0.5, seed=0, training=training)


with graph.as_default():
    # Output layer
    logits = tf.layers.dense(
        flat_output, 10, # Output units: 10
        activation=None, # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer(),
        name='dense'
    )

print(logits.shape) # Prints: (?, 10)


with graph.as_default():
    # Mean cross-entropy
    mean_ce = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=logits))

    # Adam optimizer
    lr = tf.placeholder(dtype=tf.float32)
    gd = tf.train.AdamOptimizer(learning_rate=lr)

    # Minimize cross-entropy
    train_op = gd.minimize(mean_ce)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))


#Looking at the variables in the chart
with graph.as_default():
    # Get variables in the graph
    for v in tf.trainable_variables():
        print(v)

with graph.as_default():
    # Kernel weights of the 1st conv. layer
    with tf.variable_scope('hidden', reuse=True):
        conv_kernels = tf.get_variable('kernel')
print(conv_kernels.shape)

with graph.as_default():
    # Kernel weights of the 1st conv. layer
    with tf.variable_scope('conv2', reuse=True):
        conv2_kernels = tf.get_variable('kernel')

print(conv2_kernels.shape)

(?, 16, 16, 128)
(?, 8, 8, 128)
(?, 8, 8, 16)
(?, 4, 4, 16)
(?, 4, 4, 8)
(?, 2, 2, 8)
(?, 256)
(?, 10)
<tf.Variable 'hidden/kernel:0' shape=(5, 5, 3, 128) dtype=float32_ref>
<tf.Variable 'hidden/bias:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'conv2/kernel:0' shape=(3, 3, 128, 16) dtype=float32_ref>
<tf.Variable 'conv2/bias:0' shape=(16,) dtype=float32_ref>
<tf.Variable 'conv3/kernel:0' shape=(3, 3, 16, 8) dtype=float32_ref>
<tf.Variable 'conv3/bias:0' shape=(8,) dtype=float32_ref>
<tf.Variable 'dense/kernel:0' shape=(256, 10) dtype=float32_ref>
<tf.Variable 'dense/bias:0' shape=(10,) dtype=float32_ref>
(5, 5, 3, 128)
(3, 3, 128, 16)


In [None]:
#Train the network

# Validation accuracy
valid_acc_values = []

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())

    # Set seed
    np.random.seed(0)

    # Train several epochs
    for epoch in range(10):
        # Accuracy values (train) after each batch
        batch_acc = []

        for X_batch, y_batch in get_batches(X_train, y_train, 128):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.001, # Learning rate
                training: True
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)

        # Evaluate validation accuracy
        valid_acc = sess.run(accuracy, feed_dict={
            X: X_valid,
            y: y_valid,
            training: False
        })
        valid_acc_values.append(valid_acc)

        # Print progress
        print('Epoch {} - valid: {:.3f} train: {:.3f} (mean)'.format(
            epoch+1, valid_acc, np.mean(batch_acc)
        ))

    # Get 1st conv. layer kernels
    # get the convolutional layers kernels
    ker = conv_kernels.eval()
    ker2 = conv2_kernels.eval()

Epoch 1 - valid: 0.326 train: 0.257 (mean)
Epoch 2 - valid: 0.418 train: 0.385 (mean)
Epoch 3 - valid: 0.436 train: 0.436 (mean)
Epoch 4 - valid: 0.468 train: 0.477 (mean)
Epoch 5 - valid: 0.504 train: 0.495 (mean)


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.plot(valid_acc_values)
plt.title('Validation accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()

## Exercise - Visualize kernels

> **Exercise**: Plot the kernels from the first convolutional layer with the `imshow()` function.

**Hint**: Remember that the `imshow()` function expects values between 0 and 1 for 3-dimensional arrays.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

ker_norm = (ker / 255)
#plt.imshow(ker_norm[1,1,:])
#plt.imshow()

In [None]:
ker.shape

In [None]:
plt.imshow(ker_norm[1,1,:])
plt.show()

In [None]:
plt.imshow(ker_norm[:,:,0,4])

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

#ker_norm = (ker / 255)

# create figure with subplots

fig, axes = plt.subplots(nrows=8, ncols=8, figsize=(8,8))
for i, axis in enumerate(axes.flatten()):
    # getting the kernel i
    kernel = ker_norm[:,:,0,i]
    
    # plot kernel with imshow
    plt.imshow(kernel)
    plt.show()

NameError: name 'ker_norm' is not defined