# CNN

In [1]:
!type python 
from tqdm import tqdm
from platform import python_version
from warnings import filterwarnings
filterwarnings('ignore')
import tensorflow as tf
import numpy as np
import os
import sys
print(python_version())
print(tf.__version__)
# use GPU
conf = tf.ConfigProto()
conf.gpu_options.allow_growth = True
# print(tf.test.is_gpu_available())

python is /home/kddlab/anaconda3/envs/swyoo_dl/bin/python
3.6.9
1.12.0


In [2]:
# using tensorflow_gpu=1.12.0, dataset can be downloaded
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data()
valid_x, train_x, valid_y, train_y = train_x[:5000], train_x[5000:], train_y[:5000], train_y[5000:]
print("Size of:")
print("- Training-set:\t\t\t{} \t label: \t{}".format(train_x.shape, train_y.shape))
print("- Validation-set:\t\t{} \t\t label: \t{}".format(valid_x.shape, valid_y.shape))
print("- Test-set:\t\t\t{} \t label: \t{}".format(test_x.shape, test_y.shape))

Size of:
- Training-set:			(55000, 28, 28) 	 label: 	(55000,)
- Validation-set:		(5000, 28, 28) 		 label: 	(5000,)
- Test-set:			(10000, 28, 28) 	 label: 	(10000,)


In [3]:
""" helper functions """
# yield mini-batch function
def get_minibatch(X, Y, b):
    """
    inputs: 
    - X: data features, shape (N, D)
    - Y: labels, shape (N, )
    - b: batch size, shape ()
    
    outputs:
    - x_mini, y_mini: minibatch dataset
    """
    step = len(X) // b
    # yield minibatch for each step
    for indices in np.array_split(np.random.permutation(len(X)), step):
        x_mini, y_mini = X[indices], Y[indices]
        yield x_mini, y_mini

## layers module

In [4]:
""" layers module"""
class Model:
    # computation graph 
    def __init__(self, F, FW, FS, PW, PS, H, W, HD, L, beta):
        """
        hyper parameters
        F, FW, FS = conv layer filter size, window size, stride
        PW, PS = pooling layer filter window size, stride 
        _, H, W = train_x.shape
        HD = 128 # hidden dimension
        L = 10 # number of labels
        """
        # feed
        self.X = tf.placeholder(dtype=tf.float32, shape=(None, H, W))
        self.Y = tf.placeholder(dtype=tf.int64, shape=(None))
        self.dropout = tf.placeholder(tf.float32)
        
        # helper
        reg = tf.contrib.layers.l2_regularizer(scale=beta)
        init = tf.contrib.layers.xavier_initializer()
        
        with tf.variable_scope('CNN', initializer=init, regularizer=reg):
            # CNN inputs need channel, so expand_dims function is used
            conv = tf.layers.conv2d(inputs=tf.expand_dims(self.X, axis=-1), filters=F, kernel_size=(FW,FW), strides=(FS,FS), activation=tf.nn.relu)
            pool = tf.layers.max_pooling2d(inputs=conv, pool_size=(PW,PW), strides=(PS,PS))
            
        with tf.variable_scope('FC', initializer=init, regularizer=reg):
            # flattened, so spatial information ignored
            L0 = tf.contrib.layers.flatten(pool)
            L1 = tf.contrib.layers.fully_connected(inputs=L0, num_outputs=HD, activation_fn=tf.nn.relu)
            L1 = tf.nn.dropout(L1, keep_prob=self.dropout)
            logits = tf.contrib.layers.fully_connected(inputs=L1, num_outputs=L, activation_fn=None)
        with tf.variable_scope('loss'):
            reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.Y, logits=logits))
            self.loss += tf.reduce_sum(reg_loss) # scalar
        
        # evaluation metric
        self.pred = tf.argmax(logits, axis=1)
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.Y), dtype=tf.float32))
        
        
    # train and evaluation  
    def fit(self, config, train_x, train_y, valid_x, valid_y, epoch, lr, b, dr, keep_prob=0.7, 
            save=False, SAVE_FILE='./models_mnist/model', log=False, LOG_DIR='./models_mnist'):
        """
        hyper parameters
        - epoch = 10
        - lr = 0.001
        - b = 150 # minibatch size
        - dr = 0.97 # learning rate decay rate
        - keep_prob = 0.7
        """
        step = tf.get_variable(name="global_step", shape=(), initializer=tf.zeros_initializer(), trainable=False)
        # each decay_step, learning rate will be decreased by decay_rate
        # decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps) 
        lr = tf.train.exponential_decay(learning_rate=lr, global_step=step, decay_steps=100, decay_rate=dr, staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(self.loss, global_step=step)
        
        _, H, W = train_x.shape
        
        # save options 
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        # tensorboard visualization
        if log:
            train_loss = tf.get_variable(name='train_loss', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            valid_acc = tf.get_variable(name='valid_acc', shape=(), dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False)
            train_loss_summ = tf.summary.scalar('train_loss', train_loss)
            valid_acc_summ = tf.summary.scalar('valid_acc', valid_acc)
            summ_op = tf.summary.merge([train_loss_summ, valid_acc_summ])
        
        with tf.Session(config=config) as sess:
            # initialize all variables 
            sess.run(tf.global_variables_initializer())
            # add graph to tensorboard
            if log:
                print("@terminal: $ tensorboard --logdir={}".format(LOG_DIR))
                writer = tf.summary.FileWriter(LOG_DIR, session=sess)
                writer.add_graph(sess.graph)
            # minibatch training 
            k = 0
            for e in range(epoch):
                loss_train = 0
                for i, xy in tqdm(enumerate(get_minibatch(train_x, train_y, b)), desc='Train'):
                    # minibatch dataset
                    x, y = xy
                    feed = {self.X: x, self.Y: y, self.dropout: keep_prob}
                    loss_mini, _ = sess.run([self.loss, optimizer], feed_dict=feed)
                    loss_train += loss_mini
                loss_train = loss_train / (i+1)
                
                # evaluation
                acc_valid = self.accuracy.eval(feed_dict={self.X: valid_x, self.Y: valid_y, self.dropout: 1})
                print('epoch {} \t| loss: {:.4f} \t| acc_valid: {:.4f} \t| lr: {:0.5} '.format(e+1, loss_train, acc_valid, lr.eval()))
                if log:
                    sess.run([train_loss.assign(loss_train), valid_acc.assign(acc_valid)])
                    summary = sess.run(summ_op)
                    writer.add_summary(summary, global_step=k)
                    k += 1
            print("Training End")
            if save: 
                saver.save(sess, SAVE_FILE)
                print("save model @{}".format(SAVE_FILE))
    # restore model and evaluation
    def test(self, config, test_x, test_y, SAVE_FILE='./models_mnist/model'):
        """
        restore trained weights and evaluate a model
        """
        saver = tf.train.Saver(var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        # saver = tf.train.Saver()
        
        with tf.Session(config=config) as sess:
            saver.restore(sess, SAVE_FILE)
            print("restore is completed")
            acc_test = self.accuracy.eval(feed_dict={self.X: test_x, self.Y: test_y, self.dropout: 1})
            print("acc_test: {}".format(acc_test))
            
    def summary(self):
        # print("=============================================")
        # print("list of all parameters")
        # print("=============================================")
        # for x in tf.global_variables():
        #     print(x)
        
        print("=============================================")
        print("list of all trainable parameters")
        print("=============================================")
        for x in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
            print(x)
        
        # print("=============================================")
        # print("list of parameters reflected regularization ")
        # print("=============================================")
        # for x in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
        #     print(x)

In [5]:
tf.reset_default_graph()
_, H, W = train_x.shape
model = Model(F=8, FW=9, FS=1, PW=3, PS=3, H=H, W=W, HD=128, L=10, beta=0.1)
model.fit(conf, train_x, train_y, valid_x, valid_y, epoch=10, lr=0.001, b=150, dr=0.97, keep_prob=0.7, save=True, log=True)
model.summary()
model.test(conf, test_x, test_y)

Train: 0it [00:00, ?it/s]

@terminal: $ tensorboard --logdir=./models_mnist


Train: 366it [00:01, 317.37it/s]
Train: 77it [00:00, 763.45it/s]

epoch 1 	| loss: 9.9526 	| acc_valid: 0.9350 	| lr: 0.00091267 


Train: 366it [00:00, 782.94it/s]
Train: 79it [00:00, 780.44it/s]

epoch 2 	| loss: 3.7436 	| acc_valid: 0.9618 	| lr: 0.00080798 


Train: 366it [00:00, 763.54it/s]
Train: 77it [00:00, 769.51it/s]

epoch 3 	| loss: 1.9982 	| acc_valid: 0.9722 	| lr: 0.00073742 


Train: 366it [00:00, 787.08it/s]
Train: 78it [00:00, 776.23it/s]

epoch 4 	| loss: 1.1881 	| acc_valid: 0.9722 	| lr: 0.00065284 


Train: 366it [00:00, 760.97it/s]
Train: 78it [00:00, 777.15it/s]

epoch 5 	| loss: 0.7784 	| acc_valid: 0.9786 	| lr: 0.00057795 


Train: 366it [00:00, 779.86it/s]
Train: 77it [00:00, 766.31it/s]

epoch 6 	| loss: 0.5543 	| acc_valid: 0.9772 	| lr: 0.00052748 


Train: 366it [00:00, 762.51it/s]
Train: 77it [00:00, 767.33it/s]

epoch 7 	| loss: 0.4278 	| acc_valid: 0.9792 	| lr: 0.00046698 


Train: 366it [00:00, 772.38it/s]
Train: 75it [00:00, 748.66it/s]

epoch 8 	| loss: 0.3485 	| acc_valid: 0.9796 	| lr: 0.00041341 


Train: 366it [00:00, 751.95it/s]
Train: 77it [00:00, 769.80it/s]

epoch 9 	| loss: 0.2979 	| acc_valid: 0.9776 	| lr: 0.00037731 


Train: 366it [00:00, 777.88it/s]


epoch 10 	| loss: 0.2631 	| acc_valid: 0.9804 	| lr: 0.00033403 
Training End
save model @./models_mnist/model
list of all trainable parameters
<tf.Variable 'CNN/conv2d/kernel:0' shape=(9, 9, 1, 8) dtype=float32_ref>
<tf.Variable 'CNN/conv2d/bias:0' shape=(8,) dtype=float32_ref>
<tf.Variable 'FC/fully_connected/weights:0' shape=(288, 128) dtype=float32_ref>
<tf.Variable 'FC/fully_connected/biases:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'FC/fully_connected_1/weights:0' shape=(128, 10) dtype=float32_ref>
<tf.Variable 'FC/fully_connected_1/biases:0' shape=(10,) dtype=float32_ref>
INFO:tensorflow:Restoring parameters from ./models_mnist/model
restore is completed
acc_test: 0.9789999723434448


## keras module

In [6]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPool2D

How to define customized loss function? [blog](https://towardsdatascience.com/advanced-keras-constructing-complex-custom-losses-and-metrics-c07ca130a618)

In [15]:
class MyModel(Model):
    def __init__(self, ):
        super(MyModel, self).__init__()
        
        # helper
        reg = tf.keras.regularizers.l2(l=0.01)
        
        with tf.variable_scope('CNN', regularizer=reg):
            self.c1 = Conv2D(filters=8, kernel_size=(9,9), strides=(1,1))
            self.p1 = MaxPool2D(pool_size=(3,3), strides=(3,3))

        with tf.variable_scope('FC', regularizer=reg):
            self.flat = Flatten()
            self.d1 = Dense(units=128, activation='relu')
            self.dr1 = Dropout(rate=0.7)
            self.d2 = Dense(units=10, activation='softmax')
        
#         for l in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
#             print(l)

    # overriding 
    def call(self, x):
        """
        params: 
        - x: inputs when it fits 
        return: predictions
        """ 
        # CNN call 
        x = tf.expand_dims(x, axis=-1)
        x = self.p1(self.c1(x))
        
        # FC call
        x = self.d2(self.dr1(self.d1(self.flat(x))))
        
        return x

In [16]:
model = MyModel()
model.compile(optimizer='adam', 
              loss=tf.keras.losses.sparse_categorical_crossentropy, 
              metrics=['accuracy'])
history = model.fit(x=train_x, y=train_y, batch_size=150, epochs=5, verbose=1)
# history_dict = history.history
# for key, values in history_dict.items():
#     print(key, values)
model.summary()
results = model.evaluate(x=test_x, y=test_y, verbose=1)
print(results)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            multiple                  656       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 multiple                  0         
_________________________________________________________________
flatten_5 (Flatten)          multiple                  0         
_________________________________________________________________
dense_8 (Dense)              multiple                  36992     
_________________________________________________________________
dropout_4 (Dropout)          multiple                  0         
_________________________________________________________________
dense_9 (Dense)              multiple                  1290      
Total params: 38,938
Trainable params: 38,938
Non-trainable params: 0
______________________