In [1]:
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import RMSprop, Adam
import os

Using TensorFlow backend.


In [2]:
#atch_size = 128 # batch 的大小，如果出現 OOM error，請降低這個值
batch_size = 1024 # batch 的大小，如果出現 OOM error，請降低這個值
num_classes = 10 # 類別的數量，Cifar 10 共有 10 個類別
epochs = 10 # 訓練的 epochs 數量

# 讀取資料並檢視
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# 對 label 進行 one-hot encoding (y_trian 原本是純數字)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


## 首先我們使用一般的 DNN (MLP) 來訓練
由於 DNN 只能輸入一維的資料，我們要先將影像進行攤平，若 (50000, 32, 32, 3) 的影像，攤平後會變成 (50000, 32*32*3) = (50000, 3072)

In [3]:
# 將資料攤平成一維資料
x_train = x_train.reshape(50000, 3072) 
x_test = x_test.reshape(10000, 3072)

# 將資料變為 float32 並標準化
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

50000 train samples
10000 test samples


In [4]:
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(3072,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               1573376   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 1,841,162
Trainable params: 1,841,162
Non-trainable params: 0
___________

## 接下來我們使用 CNN 來訓練神經網路
CNN 的原理非常適合處理影像類的資料，就讓我們來看看，同樣的訓練條件，CNN 是否顯著優於 DNN 呢?

In [5]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [6]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 30, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 30, 30, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 32)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 64)        18496     
__________

## 同樣運算 10 個 epochs，但 CNN 在 test data 的準確率顯著優於 DNN!

---

## 作業
1. 請試著調整各個超參數，並說明那些超參數對於結果有明顯的影響?
2. CNN 與 DNN 哪個模型的參數數量比較多? 造成參數的數量不同的原因在哪?

In [7]:
batch_size = 128 # batch 的大小，如果出現 OOM error，請降低這個值
num_classes = 10 # 類別的數量，Cifar 10 共有 10 個類別
epochs = 10 # 訓練的 epochs 數量

In [8]:
KEY_CV_FILTERS     = 'cv_filters'
KEY_CV_KERNEL_SIZE = 'cv_kernel_size'
KEY_CV_PADDING     = 'cv_padding'
KEY_CV_STRIDES     = 'cv_strides'
KEY_PL_POOL_SIZE   = 'pl_pool_size'
KEY_PL_PADDING     = 'pl_padding'
KEY_PL_STRIDES     = 'pl_strides'

In [9]:
def build_CNN(input_shape, params, num_classes=10):
    cv_filters     = params[KEY_CV_FILTERS]               
    cv_kernel_size = params[KEY_CV_KERNEL_SIZE] 
    cv_padding     = params[KEY_CV_PADDING] 
    cv_strides     = params[KEY_CV_STRIDES] 
    pl_pool_size   = params[KEY_PL_POOL_SIZE] 
    pl_padding     = params[KEY_PL_PADDING] 
    pl_strides     = params[KEY_PL_STRIDES] 

    cv_kernel_size = (cv_kernel_size, cv_kernel_size)
    cv_strides     = (cv_strides, cv_strides) 
    pl_pool_size   = (pl_pool_size, pl_pool_size)
    pl_strides     = (pl_strides, pl_strides)
    
    model = Sequential()
    model.add(Conv2D(cv_filters, cv_kernel_size, padding=cv_padding, strides=cv_strides,
                     input_shape=input_shape))
    model.add(Activation('relu'))
#    model.add(Conv2D(cv_filters, cv_kernel_size, padding=cv_padding, strides=cv_strides))
#    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=pl_pool_size, padding=pl_padding, strides=pl_strides))
#    model.add(Dropout(0.25))

#    model.add(Conv2D(64, kernel_size, padding=padding, strides=strides))
#    model.add(Activation('relu'))
#    model.add(Conv2D(64, kernel_size))
#    model.add(Activation('relu'))
#    model.add(MaxPooling2D(pool_size=pool_size))
#    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
#    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    return model

In [10]:
## 調整超參數
'''
卷積 (Convolution) 的 超參數(Hyper parameter )
• 內核⼤⼩ (Kernel size)
• 深度(Depth, Kernel的總數)
• 填充(Padding)
• 選框每次移動的步數(Stride)
池化層參數
• 池化窗口大小 (Pool size)
• 填充(Padding)
• 選框每次移動的步數(Stride)
'''

# ref: Day083
## 測試參數設定
## 卷積層超參數設定
opts_cv_filters     = [32, 16, 48]
opts_cv_kernel_size = [5, 7, 3]
opts_cv_padding     = ['valid', 'same']
opts_cv_strides     = [3, 5, 1]
## 池化層超參數設定
opts_pl_pool_size   = [3, 4, 2]
opts_pl_padding     = ['valid', 'same']
opts_pl_strides     = [2, 3, 1]

base_params = {
    KEY_CV_FILTERS     : opts_cv_filters[0],
    KEY_CV_KERNEL_SIZE : opts_cv_kernel_size[0],
    KEY_CV_PADDING     : opts_cv_padding[0],
    KEY_CV_STRIDES     : opts_cv_strides[0],
    KEY_PL_POOL_SIZE   : opts_pl_pool_size[0],
    KEY_PL_PADDING     : opts_pl_padding[0],
    KEY_PL_STRIDES     : opts_pl_strides[0],
}

opts_abbr = {
    KEY_CV_FILTERS     : 'cv_fl',
    KEY_CV_KERNEL_SIZE : 'cv_kr',
    KEY_CV_PADDING     : 'cv_pd',
    KEY_CV_STRIDES     : 'cv_st',
    KEY_PL_POOL_SIZE   : 'pl_pl',
    KEY_PL_PADDING     : 'pl_pd',
    KEY_PL_STRIDES     : 'pl_st',
}

def to_key(params):
    str_params = [f"{opts_abbr[k]}:{v}" for k, v in params.items()]
    return '-'.join(str_params)

import json
def pretty(d, indent=4):
    print(json.dumps(d, indent=indent))

test_cases = [
    # (case_name, case_opts, param_key, base_params) 
    (KEY_CV_FILTERS,     opts_cv_filters,     KEY_CV_FILTERS,     dict(base_params)),
    (KEY_CV_KERNEL_SIZE, opts_cv_kernel_size, KEY_CV_KERNEL_SIZE, dict(base_params)),
    (KEY_CV_PADDING,     opts_cv_padding,     KEY_CV_PADDING,     dict(base_params)),
    (KEY_CV_STRIDES,     opts_cv_strides,     KEY_CV_STRIDES,     dict(base_params)),
    (KEY_PL_POOL_SIZE,   opts_pl_pool_size,   KEY_PL_POOL_SIZE,   dict(base_params)),
    (KEY_PL_PADDING,     opts_pl_padding,     KEY_PL_PADDING,     dict(base_params)),
    (KEY_PL_STRIDES,     opts_pl_strides,     KEY_PL_STRIDES,     dict(base_params)),
]


In [11]:
results = {}

for i, (case_name, case_opts, param_key, params) in enumerate(test_cases):
    print(f"{'='*20} [{case_name:30s}] {'='*20}")
    for opts in case_opts:
        params[param_key] = opts
     
        key = to_key(params)
        if key in results:
            continue
          
        keras.backend.clear_session() # 把舊的 Graph 清掉

        model = build_CNN(x_train.shape[1:], params)

        print()
        print('-'*60)
        pretty(params)
        print()
        print(model.summary())
        print()

        model.compile(loss='categorical_crossentropy',
                      optimizer=RMSprop(),
                      metrics=['accuracy'])

        model.fit(x_train, y_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=1,
                            validation_data=(x_test, y_test))            

        score = model.evaluate(x_test, y_test, verbose=0)
        test_loss = score[0]
        test_acc  = score[1]

        # Collect results
        train_loss = model.history.history["loss"]
        valid_loss = model.history.history["val_loss"]
        train_acc = model.history.history["acc"]
        valid_acc = model.history.history["val_acc"]

        exp_name_tag = key
        results[exp_name_tag] = {'train-loss': train_loss,
                                 'valid-loss': valid_loss,
                                 'train-acc': train_acc,
                                 'valid-acc': valid_acc,
                                 'test-loss':test_loss,
                                 'test-acc':test_acc,
                                }



------------------------------------------------------------
{
    "cv_filters": 32,
    "cv_kernel_size": 5,
    "cv_padding": "valid",
    "cv_strides": 3,
    "pl_pool_size": 3,
    "pl_padding": "valid",
    "pl_strides": 2
}

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 10, 10, 32)        2432      
_________________________________________________________________
activation_1 (Activation)    (None, 10, 10, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
__________________________________________

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

------------------------------------------------------------
{
    "cv_filters": 32,
    "cv_kernel_size": 7,
    "cv_padding": "valid",
    "cv_strides": 3,
    "pl_pool_size": 3,
    "pl_padding": "valid",
    "pl_strides": 2
}

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 9, 9, 32)          4736      
_________________________________________________________________
activation_1 (Activation)    (None, 9, 9, 32)          0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

------------------------------------------------------------
{
    "cv_filters": 32,
    "cv_kernel_size": 5,
    "cv_padding": "valid",
    "cv_strides": 5,
    "pl_pool_size": 3,
    "pl_padding": "valid",
    "pl_strides": 2
}

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 6, 6, 32)          2432      
_________________________________________________________________
activation_1 (Activation)    (None, 6, 6, 32)          0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 2, 2, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 128)               0         
_____________

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

------------------------------------------------------------
{
    "cv_filters": 32,
    "cv_kernel_size": 5,
    "cv_padding": "valid",
    "cv_strides": 3,
    "pl_pool_size": 2,
    "pl_padding": "valid",
    "pl_strides": 2
}

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 10, 10, 32)        2432      
_________________________________________________________________
activation_1 (Activation)    (None, 10, 10, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 800)               0         
_____________

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

------------------------------------------------------------
{
    "cv_filters": 32,
    "cv_kernel_size": 5,
    "cv_padding": "valid",
    "cv_strides": 3,
    "pl_pool_size": 3,
    "pl_padding": "valid",
    "pl_strides": 1
}

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 10, 10, 32)        2432      
_________________________________________________________________
activation_1 (Activation)    (None, 10, 10, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_____________

In [12]:
# print comarison result
ref_params = dict(base_params)
print(f"base params")
pretty(ref_params)
print()

ref_key = to_key(ref_params)
ref_test_acc = results[ref_key]['test-acc']

for i, (case_name, case_opts, param_key, params) in enumerate(test_cases):
    print(f"{'='*20} [{case_name:30s}] {'='*20}")
    for opts_idx, opts in enumerate(case_opts):
        params[param_key] = opts
     
        key = to_key(params)
        result = results[key]
        test_acc = result['test-acc']
        print(f"{str(opts_idx+1):>5s}. {case_name}:{opts}\t\taccuracy={test_acc:.7f}  ({(test_acc-ref_test_acc):+.7f})")

    print('-'*74)
    print()


base params
{
    "cv_filters": 32,
    "cv_kernel_size": 5,
    "cv_padding": "valid",
    "cv_strides": 3,
    "pl_pool_size": 3,
    "pl_padding": "valid",
    "pl_strides": 2
}

    1. cv_filters:32		accuracy=0.6319000  (+0.0000000)
    2. cv_filters:16		accuracy=0.5538000  (-0.0781000)
    3. cv_filters:48		accuracy=0.6311000  (-0.0008000)
--------------------------------------------------------------------------

    1. cv_kernel_size:5		accuracy=0.6319000  (+0.0000000)
    2. cv_kernel_size:7		accuracy=0.6479000  (+0.0160000)
    3. cv_kernel_size:3		accuracy=0.5646000  (-0.0673000)
--------------------------------------------------------------------------

    1. cv_padding:valid		accuracy=0.6319000  (+0.0000000)
    2. cv_padding:same		accuracy=0.6385000  (+0.0066000)
--------------------------------------------------------------------------

    1. cv_strides:3		accuracy=0.6319000  (+0.0000000)
    2. cv_strides:5		accuracy=0.4995000  (-0.1324000)
    3. cv_strides:1		accurac

## 作業
1. 請試著調整各個超參數，並說明那些超參數對於結果有明顯的影響?
> 結果比較輸出如上 <br/>
> 最差: (Conv2D) strides=3 -> 5 <br/>
> 最佳: (MaxPooling2D) strides=2 -> 3 <br/>
2. CNN 與 DNN 哪個模型的參數數量比較多? 造成參數的數量不同的原因在哪?
> - DNN Total params: 1,841,162 <br/>
> - CNN Total params: 1,250, 858 <br/>
>  
> 兩者網路拓撲不同，不易直接比較。 <br/>
> 但一般來說 DNN 模型的參數數量會比較多，因全連接層組合參數最多; <br/> 
> CNN網路若透過適當選取前段卷積層(Conv2D)及池化層(MaxPooling2D)參數，可有效減少後段全連階層輸入參數維度 <br/>
> 若卷積層特徵數(filter)選取過多，有時反而會增加後段全連階層輸入 <br/>