# CNN CIFAR-10 Normalize Input

Ng [youtube](https://www.youtube.com/watch?v=FDCfw-YqWTE&index=9&list=PLkDaE6sCZn6Hn0vK8co82zjQtt3T2Nkqc)

<div align="center"><img src="img/Screen Shot 2018-12-07 at 12.49.17 PM.png" width="80%"></div>

##### Centering
$$
\begin{array}{lcl}
\mu&=&\frac{1}{m}\sum_{i=1}^m{\bf x}^{(i)}\\
{\bf x}^{(i)}&-=&\mu
\end{array}$$

##### Standardizing
$$
\begin{array}{lcl}
\sigma^2&=&\frac{1}{m}\sum_{i=1}^m{\bf x}^{(i)}**2\\
{\bf x}^{(i)}&/=&\sigma
\end{array}$$

# Normalize Input

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from sklearn.metrics import confusion_matrix

# download these four fro https://github.com/Hvass-Labs/TensorFlow-Tutorials
# we use these to download CIFAR10 dataset
import cache    # module from Hvass Labs
import dataset  # module from Hvass Labs
import download # module from Hvass Labs
import cifar10  # module from Hvass Labs

import utils

# this line should be commented out for regular python run 
%matplotlib inline  
# this line should be commented out for regular python run 


""" Hyperparameter """
data_size_train = 50000
data_size_test = 10000
batch_size = 100
lr = 1e-3
epoch = 50


""" Data Loading """
def load_cifar10():
    # make directory if not exist
    if not os.path.isdir("data"):
        os.mkdir("data")
    if not os.path.isdir("data/CIFAR-10"):
        os.mkdir("data/CIFAR-10")

    # download and extract if not done yet
    # data is downloaded 
    # from data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    # to data_path  = "data/CIFAR-10/"
    cifar10.data_path = "data/CIFAR-10/"
    cifar10.maybe_download_and_extract()

    # load data
    x_train, y_train_cls, y_train = cifar10.load_training_data()
    x_test, y_test_cls, y_test = cifar10.load_test_data()
    class_names = cifar10.load_class_names()
    
    x_train = (x_train - np.mean(x_train, axis=0)) / np.std(x_train, axis=0)
    x_test  = (x_test - np.mean(x_train, axis=0))  / np.std(x_train, axis=0)

    x_train = x_train.astype(np.float32)
    y_train_cls = y_train_cls.astype(np.int32)
    y_train = y_train.astype(np.float32)
    x_test = x_test.astype(np.float32)
    y_test_cls = y_test_cls.astype(np.int32)
    y_test = y_test.astype(np.float32)

    data = (x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, class_names)
    
    print()
    print(x_train.shape)
    print(x_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    print(y_train_cls.shape)
    print(y_test_cls.shape)
    
    print()
    print(np.mean(x_train, axis=0).shape) # (32, 32, 3)
    print(np.std(x_train, axis=0).shape) # (32, 32, 3)

    return data

# x_train.shape     :  (50000, 32, 32, 3)
# x_test.shape      :  (10000, 32, 32, 3)
# y_train.shape     :  (50000, 10)
# y_test.shape      :  (10000, 10)
# y_train_cls.shape :  (50000,)
# y_test_cls.shape  :  (10000,)
data = load_cifar10()
x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, class_names = data


""" Graph Construction """
tf.random.set_random_seed(337)

# placeholders
x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
y_cls = tf.placeholder(tf.int32, shape=(None,), name='y_cls')

# weights and layers #################################################################

# convolution layer 1
# input and input.shape:   x,     (None, 32, 32, 3)
# output and output.shape: conv1, (None, 16, 16, 32)
conv1_W = tf.get_variable("conv1_W", \
                          shape=(3,3,3,32), \
                          initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1 = tf.nn.conv2d(x, conv1_W, \
                     strides=(1,1,1,1), padding='SAME') # (None, 32, 32, 32)
conv1 = tf.nn.relu(conv1) # (None, 32, 32, 32)
conv1 = tf.nn.max_pool(conv1, ksize=(1,2,2,1), strides=(1,2,2,1), \
                       padding='SAME') # (None, 16, 16, 32)

# convolution layer 2
# input and input.shape:   conv1, (None, 16, 16, 32)
# output and output.shape: conv2, (None, 8, 8, 64)
conv2_W = tf.get_variable("conv2_W", \
                          shape=(3,3,32,64), \
                          initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=(1,1,1,1), \
                     padding='SAME') # (None, 16, 16, 64)
conv2 = tf.nn.relu(conv2) # (None, 16, 16, 64)
conv2 = tf.nn.max_pool(conv2, ksize=(1,2,2,1), strides=(1,2,2,1), \
                       padding='SAME') # (None, 8, 8, 64)

# fully connected layer
# input and input.shape:   conv2, (None, 8, 8, 64)
# output and output.shape: fc,    (None, 256) 
flatten = tf.reshape(conv2, (-1, 4096)) # (None, 4096) 
fc_W = tf.get_variable("fc_W", \
                        shape=(4096,256), \
                        initializer=tf.truncated_normal_initializer(stddev=0.1))
fc = tf.matmul(flatten, fc_W) # (None, 256) 
fc = tf.nn.relu(fc) # (None, 256)

# output layer
# input and input.shape:   fc,     (None, 256) 
# output and output.shape: logits, (None, 10) 
out_W = tf.get_variable("out_W", \
                        shape=(256, 10), \
                        initializer=tf.truncated_normal_initializer(stddev=0.1))
logits = fc @ out_W # (None, 10) 

# weights and layers #################################################################

# y_pred, and y_pred_cls 
y_pred = tf.nn.softmax(logits, name='y_pred') # probabilities
y_pred_cls = tf.argmax(logits, axis=1, output_type=tf.int32)

# cross_entropy cost function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                           labels=y)
cost = tf.reduce_mean(cross_entropy)

# optimizer
# https://towardsdatascience.com/batch-normalization-theory-and-how-to-use-it-with-tensorflow-1892ca0173ad
# It is really important to get the update ops as stated in the Tensorflow documentation 
# because in training time the moving variance and the moving mean of the layer 
# have to be updated. 
# If you don’t do this, 
# batch normalization will not work and the network will not train as expected.
train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

# test accuracy
# you have to put test sets to compute test_accuracy
correct_bool = tf.equal(y_cls, y_pred_cls)
test_accuracy = tf.reduce_mean(tf.cast(correct_bool, tf.float32))


""" Train and Test """
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    
    # run gradient descent
    for i in range(epoch):
        if 1:
            # perform random permutaion
            idx = np.random.permutation(np.arange(data_size_train)) 
            x_batch = x_train[idx]
            y_batch = y_train[idx]
        else:
            # don't perform random permutaion
            x_batch = x_train
            y_batch = y_train
            
        cost_this_epoch = 0
        for batch_number in range(int(data_size_train/batch_size)): 
            x_b = x_batch[batch_number*batch_size:(batch_number+1)*batch_size]
            y_b = y_batch[batch_number*batch_size:(batch_number+1)*batch_size]
            feed_dict = {x: x_b, y: y_b}
            _, cost_now = sess.run([train_op, cost], feed_dict=feed_dict)
            cost_this_epoch += cost_now
            
        print('epoch_number    :', i)
        print('cost_this_epoch :', cost_this_epoch)
        print()

            
    # compute test accuracy and print confusion matrix 
    x_data = x_test
    y_data = y_test
    y_cls_data = y_test_cls
    y_test_cls_pred = np.zeros(shape=(data_size_test), dtype=np.int32)
    test_accuracy_list = []
    for batch_number in range(int(data_size_test/batch_size)): 
        x_b = x_data[batch_number*batch_size:(batch_number+1)*batch_size]
        y_b = y_data[batch_number*batch_size:(batch_number+1)*batch_size]
        y_cls_b = y_cls_data[batch_number*batch_size:(batch_number+1)*batch_size]
        feed_dict = {x: x_b, y: y_b, y_cls: y_cls_b}
        test_accuracy_temp, y_test_cls_pred_now = sess.run([test_accuracy,y_pred_cls],
                                                           feed_dict=feed_dict)
        test_accuracy_list.append(test_accuracy_temp)
        y_test_cls_pred[batch_number*batch_size:(batch_number+1)*batch_size] = \
            y_test_cls_pred_now
        
    print('Test Accuracy: ', np.mean(np.array(test_accuracy_list)))
    
    cm = confusion_matrix(y_true=y_test_cls, y_pred=y_test_cls_pred)
    print(cm)

Data has apparently already been downloaded and unpacked.
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_1
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_2
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_3
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_4
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_5
Loading data: data/CIFAR-10/cifar-10-batches-py/test_batch
Loading data: data/CIFAR-10/cifar-10-batches-py/batches.meta

(50000, 32, 32, 3)
(10000, 32, 32, 3)
(50000, 10)
(10000, 10)
(50000,)
(10000,)

(32, 32, 3)
(32, 32, 3)
epoch_number    : 0
cost_this_epoch : 703.3905563354492

epoch_number    : 1
cost_this_epoch : 481.24912345409393

epoch_number    : 2
cost_this_epoch : 385.63674914836884

epoch_number    : 3
cost_this_epoch : 306.9986826479435

epoch_number    : 4
cost_this_epoch : 233.5288823246956

epoch_number    : 5
cost_this_epoch : 162.51460602879524

epoch_number    : 6
cost_this_epoch : 104.21656459569931

epoch_nu

# Normalize Input using Batch_Norm

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from sklearn.metrics import confusion_matrix

import cache    # module from Hvass Labs
import dataset  # module from Hvass Labs
import download # module from Hvass Labs
import cifar10  # module from Hvass Labs

import utils

# this line should be commented out for regular python run 
%matplotlib inline  
# this line should be commented out for regular python run 


""" Hyperparameter """
data_size_train = 50000
data_size_test = 10000
batch_size = 100
lr = 1e-3
epoch = 50


""" Data Loading """
def load_cifar10():
    # make directory if not exist
    if not os.path.isdir("data"):
        os.mkdir("data")
    if not os.path.isdir("data/CIFAR-10"):
        os.mkdir("data/CIFAR-10")

    # download and extract if not done yet
    # data is downloaded 
    # from data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    # to data_path  = "data/CIFAR-10/"
    cifar10.data_path = "data/CIFAR-10/"
    cifar10.maybe_download_and_extract()

    # load data
    x_train, y_train_cls, y_train = cifar10.load_training_data()
    x_test, y_test_cls, y_test = cifar10.load_test_data()
    class_names = cifar10.load_class_names()

    x_train = x_train.astype(np.float32)
    y_train_cls = y_train_cls.astype(np.int32)
    y_train = y_train.astype(np.float32)
    x_test = x_test.astype(np.float32)
    y_test_cls = y_test_cls.astype(np.int32)
    y_test = y_test.astype(np.float32)

    data = (x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, class_names)

    return data

# x_train.shape     :  (50000, 32, 32, 3)
# x_test.shape      :  (10000, 32, 32, 3)
# y_train.shape     :  (50000, 10)
# y_test.shape      :  (10000, 10)
# y_train_cls.shape :  (50000,)
# y_test_cls.shape  :  (10000,)
data = load_cifar10()
x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, class_names = data


""" Graph Construction """
tf.random.set_random_seed(337)

# placeholders
x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3), name='x')
y = tf.placeholder(tf.float32, shape=(None, 10), name='y')
y_cls = tf.placeholder(tf.int32, shape=(None,), name='y_cls')
is_train = tf.placeholder(tf.bool, shape=(), name='is_train')

# weights and layers #################################################################

# convolution layer 1
# input and input.shape:   x,     (None, 32, 32, 3)
# output and output.shape: conv1, (None, 16, 16, 32)
x_normalized = tf.layers.batch_normalization(x, training=is_train)
conv1_W = tf.get_variable("conv1_W", \
                          shape=(3,3,3,32), \
                          initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1 = tf.nn.conv2d(x_normalized, conv1_W, \
                     strides=(1,1,1,1), padding='SAME') # (None, 32, 32, 32)
conv1 = tf.nn.relu(conv1) # (None, 32, 32, 32)
conv1 = tf.nn.max_pool(conv1, ksize=(1,2,2,1), strides=(1,2,2,1), \
                       padding='SAME') # (None, 16, 16, 32)

# convolution layer 2
# input and input.shape:   conv1, (None, 16, 16, 32)
# output and output.shape: conv2, (None, 8, 8, 64)
conv2_W = tf.get_variable("conv2_W", \
                          shape=(3,3,32,64), \
                          initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=(1,1,1,1), \
                     padding='SAME') # (None, 16, 16, 64)
conv2 = tf.nn.relu(conv2) # (None, 16, 16, 64)
conv2 = tf.nn.max_pool(conv2, ksize=(1,2,2,1), strides=(1,2,2,1), \
                       padding='SAME') # (None, 8, 8, 64)

# fully connected layer
# input and input.shape:   conv2, (None, 8, 8, 64)
# output and output.shape: fc,    (None, 256) 
flatten = tf.reshape(conv2, (-1, 4096)) # (None, 4096) 
fc_W = tf.get_variable("fc_W", \
                        shape=(4096,256), \
                        initializer=tf.truncated_normal_initializer(stddev=0.1))
fc = tf.matmul(flatten, fc_W) # (None, 256) 
fc = tf.nn.relu(fc) # (None, 256)

# output layer
# input and input.shape:   fc,     (None, 256) 
# output and output.shape: logits, (None, 10) 
out_W = tf.get_variable("out_W", \
                        shape=(256, 10), \
                        initializer=tf.truncated_normal_initializer(stddev=0.1))
logits = fc @ out_W # (None, 10) 

# weights and layers #################################################################

# y_pred, and y_pred_cls 
y_pred = tf.nn.softmax(logits, name='y_pred') # probabilities
y_pred_cls = tf.argmax(logits, axis=1, output_type=tf.int32)

# cross_entropy cost function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                           labels=y)
cost = tf.reduce_mean(cross_entropy)

# optimizer
# https://towardsdatascience.com/batch-normalization-theory-and-how-to-use-it-with-tensorflow-1892ca0173ad
# It is really important to get the update ops as stated in the Tensorflow documentation 
# because in training time the moving variance and the moving mean of the layer 
# have to be updated. 
# If you don’t do this, 
# batch normalization will not work and the network will not train as expected.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

# test accuracy
# you have to put test sets to compute test_accuracy
correct_bool = tf.equal(y_cls, y_pred_cls)
test_accuracy = tf.reduce_mean(tf.cast(correct_bool, tf.float32))


""" Train and Test """
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    
    
    # run gradient descent
    for i in range(epoch):
        if 1:
            # perform random permutaion
            idx = np.random.permutation(np.arange(data_size_train)) 
            x_batch = x_train[idx]
            y_batch = y_train[idx]
        else:
            # don't perform random permutaion
            x_batch = x_train
            y_batch = y_train
            
        cost_this_epoch = 0
        for batch_number in range(int(data_size_train/batch_size)): 
            x_b = x_batch[batch_number*batch_size:(batch_number+1)*batch_size]
            y_b = y_batch[batch_number*batch_size:(batch_number+1)*batch_size]
            feed_dict = {x: x_b, y: y_b, is_train: True}
            _, cost_now = sess.run([train_op, cost], feed_dict=feed_dict)
            cost_this_epoch += cost_now
            
        print('epoch_number    :', i)
        print('cost_this_epoch :', cost_this_epoch)
        print()

            
    # compute test accuracy and print confusion matrix 
    x_data = x_test
    y_data = y_test
    y_cls_data = y_test_cls
    y_test_cls_pred = np.zeros(shape=(data_size_test), dtype=np.int32)
    test_accuracy_list = []
    for batch_number in range(int(data_size_test/batch_size)): 
        x_b = x_data[batch_number*batch_size:(batch_number+1)*batch_size]
        y_b = y_data[batch_number*batch_size:(batch_number+1)*batch_size]
        y_cls_b = y_cls_data[batch_number*batch_size:(batch_number+1)*batch_size]
        feed_dict = {x: x_b, y: y_b, y_cls: y_cls_b, is_train: False}
        test_accuracy_temp, y_test_cls_pred_now = sess.run([test_accuracy,y_pred_cls],
                                                           feed_dict=feed_dict)
        test_accuracy_list.append(test_accuracy_temp)
        y_test_cls_pred[batch_number*batch_size:(batch_number+1)*batch_size] = \
            y_test_cls_pred_now
        
    print('Test Accuracy: ', np.mean(np.array(test_accuracy_list)))
    
    cm = confusion_matrix(y_true=y_test_cls, y_pred=y_test_cls_pred)
    print(cm)

Data has apparently already been downloaded and unpacked.
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_1
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_2
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_3
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_4
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_5
Loading data: data/CIFAR-10/cifar-10-batches-py/test_batch
Loading data: data/CIFAR-10/cifar-10-batches-py/batches.meta
epoch_number    : 0
cost_this_epoch : 706.8615829348564

epoch_number    : 1
cost_this_epoch : 486.4096092581749

epoch_number    : 2
cost_this_epoch : 395.7249400615692

epoch_number    : 3
cost_this_epoch : 320.5073572397232

epoch_number    : 4
cost_this_epoch : 246.44318306446075

epoch_number    : 5
cost_this_epoch : 175.8351795077324

epoch_number    : 6
cost_this_epoch : 119.34511479735374

epoch_number    : 7
cost_this_epoch : 77.02031491324306

epoch_number    : 8
cost_this_epoch : 54.3723362069577

epo