# <div style="text-align: center"> CNN Architectures </div>
#### <div style="text-align: right"> 2019.09.19 Thursday </div>
#### <div style="text-align: right"> Prof. Changho Suh, TA Jaewoong Cho, Junhyung Ahn</div>

**실습목표**
- 수업시간에 배운 AlexNet, ResNet 생성
- CIFAR10 Data 불러오기, 데이터 확인 
- Validation 및 Early stopping 개념 이해
- AlexNet 과 ResNet에서 CIFAR10 training
- 성능비교 

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras as keras
import numpy as np

import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from tensorflow.keras.datasets import cifar10, cifar100
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.datasets.cifar import load_batch
from tensorflow.python.keras.utils.data_utils import get_file
from tensorflow.python.util.tf_export import keras_export
from tensorflow.keras.models import load_model

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

## 1) Functions

In [None]:
def get_smooth_batch_losses(array):
    smoothing_alpha = 0.95
    output = []
    value = 0
    for i in range(len(array)):
        value = smoothing_alpha * value + (1 - smoothing_alpha) * array[i]
        output.append(value / (1 - smoothing_alpha**(i+1)))
    return output

In [None]:
def plot(cnn_batch_stats):
    plt.figure(figsize = (15,15))
    
    plt.subplot(2,2,1)
    plt.ylabel("Loss", fontsize=15)
    plt.xlabel("Training Steps", fontsize=15)
    plt.ylim([0,2])
    plt.plot(get_smooth_batch_losses(cnn_batch_stats.batch_losses))

    plt.subplot(2,2,2)
    plt.ylabel("Train Accuracy", fontsize=15)
    plt.xlabel("Training Steps", fontsize=15)
    plt.ylim([0,1])
    plt.plot(get_smooth_batch_losses(cnn_batch_stats.batch_acc))

    plt.subplot(2,2,3)
    plt.ylabel("Loss", fontsize=15)
    plt.xlabel("Epochs" , fontsize=15)
    plt.ylim([0,3])
    plt.plot(cnn_batch_stats.epoch_train_loss, label = 'train loss per epoch')
    plt.plot(cnn_batch_stats.epoch_val_loss, label = 'validation loss per epoch')
    plt.legend()
    
    plt.subplot(2,2,4)
    plt.ylabel("Accuracy", fontsize=15)
    plt.xlabel("Epochs", fontsize=15)
    plt.ylim([0,1])
    plt.plot(cnn_batch_stats.epoch_train_acc, label = 'train accuracy per epoch')
    plt.plot(cnn_batch_stats.epoch_val_acc, label = 'validation accuracy per epoch')
    plt.legend()

In [None]:
# For logging
class CollectBatchStats(tf.keras.callbacks.Callback):
    def __init__(self):
        self.batch_losses = []
        self.batch_acc = []
        self.epoch_train_loss = []
        self.epoch_train_acc = []
        self.epoch_val_loss = []
        self.epoch_val_acc = []

    def on_batch_end(self, batch, logs=None):
        self.batch_losses.append(logs['loss'])
        self.batch_acc.append(logs['acc'])

    def on_epoch_end(self, epoch, logs=None):
        self.epoch_train_loss.append(logs['loss'])
        self.epoch_train_acc.append(logs['acc'])
        self.epoch_val_loss.append(logs['val_loss'])
        self.epoch_val_acc.append(logs['val_acc'])

In [None]:
def scheduler(epoch, lr):
    if epoch == 30 or epoch ==60 : 
        lr = lr  * 0.1
    else:
        lr = lr
    return lr

## 2) Models

### 2-1) AlexNet

In [None]:
# AleXNet without Batch normalization, Dropout layer  
alexnet_model = tf.keras.models.Sequential()

#1. First Layer (Convolutional Layer)
alexnet_model.add(tf.keras.layers.Conv2D(filters=48, input_shape=(32,32,3), kernel_size=(3,3), 
                                         strides= (1,1), padding='same'))
alexnet_model.add(tf.keras.layers.Activation('relu'))
alexnet_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2),padding = 'valid'))

#2. Second Layer (Convolutional Layer)
alexnet_model.add(tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), strides=(1,1), padding='same'))
alexnet_model.add(tf.keras.layers.Activation('relu'))
alexnet_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2),padding = 'valid'))

#3. Third Layer (Convolutional Layer)
alexnet_model.add(tf.keras.layers.Conv2D(filters=192, kernel_size=(3,3), strides=(1,1), padding='same'))
alexnet_model.add(tf.keras.layers.Activation('relu'))

#4. Fourth Layer (Convolutional Layer)
alexnet_model.add(tf.keras.layers.Conv2D(filters=192, kernel_size=(3,3), strides=(1,1), padding='same'))
alexnet_model.add(tf.keras.layers.Activation('relu'))

#5. Fifth Layer (Convolutional Layer)
alexnet_model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same'))
alexnet_model.add(tf.keras.layers.Activation('relu'))
alexnet_model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2),padding = 'valid'))

#6. Sixth Layer (Fully Connected Layer)
alexnet_model.add(tf.keras.layers.Flatten())
alexnet_model.add(tf.keras.layers.Dense(512))
alexnet_model.add(tf.keras.layers.Activation('relu'))

#7. Seventh Layer (Fully Connected Layer)
alexnet_model.add(tf.keras.layers.Dense(256))
alexnet_model.add(tf.keras.layers.Activation('relu'))

#8. Eighth Layer (Fully Connected Layer)
alexnet_model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax))

alexnet_model.summary()


### 2-2) ResNet

In [None]:
#ResNet
input = keras.layers.Input(shape=(32, 32, 3))
x = keras.layers.Conv2D(32, kernel_size=3, padding='same', strides=(1, 1))(input)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Conv2D(32, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPool2D(2)(x)
x = keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
skip = x
x = keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Conv2D(64, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Add()([x, skip])
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPool2D(2)(x)
x = keras.layers.Conv2D(128, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
skip = x
x = keras.layers.Conv2D(128, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Conv2D(128, kernel_size=3, padding='same', strides=(1, 1))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Add()([x, skip])
x = keras.layers.ReLU()(x)
x = keras.layers.AveragePooling2D(pool_size=(8, 8))(x)
x = keras.layers.Flatten()(x)

# Fully-connected layer
output = keras.layers.Dense(10, activation=tf.nn.softmax)(x)
resnet = keras.models.Model(input, output)

resnet.summary()

In [None]:
models = {'alexnet': alexnet_model , 'resnet': resnet}

## 3) Dataset

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 1/5, stratify=y_train)

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

In [None]:
num_classes = 10
class_names = ['airplane','automobile',
                       'bird','cat',
                       'deer','dog',
                       'frog','horse',
                       'ship','truck']

fig = plt.figure(figsize=(8,3))

for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    idx = np.where(y_train[:]==i)[0]
    features_idx = x_train[idx]
    img_num = np.random.randint(features_idx.shape[0])
    im = features_idx[img_num]
    ax.set_title(class_names[i])
    plt.imshow(im)
plt.show()

## 4) Training

### 4-1) AlexNet training

In [None]:
#Experiment setting
LRSchedule_flag = False #True or False
learning_rate = 0.001
momentum=0.9
batch_size = 128
epochs = 200

In [None]:
model_name = 'alexnet'
cnn_model = models[model_name]

# Compile CNN model
cnn_model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

#model save folder path
model_save_folder_path = './model/'
if not os.path.exists(model_save_folder_path):
    os.mkdir(model_save_folder_path)

model_save_folder_path = model_save_folder_path+model_name
if not os.path.exists(model_save_folder_path):
    os.mkdir(model_save_folder_path)
    
model_path = model_save_folder_path+'/best_model.hdf5'

#collect callback
cnn_batch_stats = CollectBatchStats()
#checkpoint callback
cb_checkpoint = ModelCheckpoint(filepath=model_path, 
                                monitor='val_acc',
                                verbose=1, 
                                save_best_only=True)
#early stopping callback
cb_early_stopping = EarlyStopping(monitor='val_acc', patience=20)
#learning rate schedule callback
cb_learning_rate_schedule = LearningRateScheduler(scheduler)

if LRSchedule_flag:
    callbacks = [cnn_batch_stats, cb_checkpoint, cb_learning_rate_schedule]
else:
    callbacks = [cnn_batch_stats, cb_checkpoint, cb_early_stopping]
    
# Train CNN model
cnn_model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              callbacks=callbacks,
              validation_data=(x_val, y_val))

In [None]:
plot(cnn_batch_stats)

In [None]:
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print('Test acc = ', test_acc)

In [None]:
saved_model = load_model(model_path)
test_loss, test_acc = saved_model.evaluate(x_test, y_test)
print('Test acc = ', test_acc)

### 4-2) ResNet training

In [None]:
#Experiment Setting
LRSchedule_flag = False #True or False
learning_rate = 0.01
momentum = 0.9
batch_size = 128
epochs = 200

In [None]:
model_name = 'resnet'
cnn_model = models[model_name]

# Compile CNN model
cnn_model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

#model save folder path
model_save_folder_path = './model/'
if not os.path.exists(model_save_folder_path):
    os.mkdir(model_save_folder_path)

model_save_folder_path = model_save_folder_path+model_name
if not os.path.exists(model_save_folder_path):
    os.mkdir(model_save_folder_path)
    
model_path = model_save_folder_path+'/best_model.hdf5'

#collect callback
cnn_batch_stats = CollectBatchStats()
#checkpoint callback
cb_checkpoint = ModelCheckpoint(filepath=model_path, 
                                monitor='val_acc',
                                verbose=1, 
                                save_best_only=True)
#early stopping callback
cb_early_stopping = EarlyStopping(monitor='val_acc', patience=20)
#learning rate schedule callback
cb_learning_rate_schedule = LearningRateScheduler(scheduler)

if LRSchedule_flag:
    callbacks = [cnn_batch_stats, cb_checkpoint, cb_learning_rate_schedule]
else:
    callbacks = [cnn_batch_stats, cb_checkpoint, cb_early_stopping]
    
# Train CNN model
cnn_model.fit(x_train, y_train,
              batch_size=batch_size,epochs=epochs,
              callbacks=callbacks,
              validation_data=(x_val, y_val),)

In [None]:
plot(cnn_batch_stats)

In [None]:
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print('Test acc = ', test_acc)

In [None]:
saved_model = load_model(model_path)
test_loss, test_acc = saved_model.evaluate(x_test, y_test)
print('Test acc = ', test_acc)