In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train = x_train / 255.0
x_test = x_test / 255.0

In [4]:
# 裝回tf.data.Dataset

ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train

<TensorSliceDataset shapes: ((32, 32, 3), (10,)), types: (tf.float64, tf.float32)>

In [5]:
# 取得單一batch

one_batch = ds_train.batch(batch_size=128).take(1)

for (x, y) in one_batch:
    print(x.shape)
    print(y.shape)

(128, 32, 32, 3)
(128, 10)


In [6]:
# 透過模型去擬合


model = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Conv2D(64, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.GlobalMaxPooling2D(),
    layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics='acc'
)

model.fit(one_batch.repeat(),epochs=20, steps_per_epoch=100, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f7bef33fb50>

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 64)        1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
global_max_pooling2d (Global (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 10)                650       
Total params: 2,442
Trainable params: 2,442
Non-trainable params: 0
_________________________________________________________________


In [12]:
# 上面是的確適合圖片的神經網路，那麼讓我們看看另外一個神經網路，單純用MLP去做

model_mlp = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model_mlp.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc']
)
model_mlp.fit(
    x=one_batch.repeat(),
    epochs=20,
    steps_per_epoch=100,
    validation_data=(x_test, y_test)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f7b8d93c250>

In [13]:
model_mlp.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 32)                98336     
_________________________________________________________________
dense_8 (Dense)              (None, 10)                330       
Total params: 98,666
Trainable params: 98,666
Non-trainable params: 0
_________________________________________________________________


> 透過single_batch去測試模型fit能力，雖然模型不大，但fit能力足夠，但很顯然general是差的。

In [15]:
# 接著，去fit整個dataset

model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7b8ae32d10>

In [16]:
model_mlp.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7b88de25d0>

> 這邊用全部資料可以發現，CNN模型雖然參數小，但是用正確的方式學習到參數，雖然只跑了10個epoch不足以達到適用的效果，但整體趨勢樂觀。而MLP則是效果不好，但也是有學習的趨勢。

> 那我們再試看看，多一點epoch會如何(因為兩個模型可以看到都在學習中)


In [17]:
model.fit(x_train, y_train, batch_size=128, epochs=90, validation_data=(x_test, y_test))

Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
E

<tensorflow.python.keras.callbacks.History at 0x7f7b88578b50>

In [18]:
model_mlp.fit(x_train, y_train, batch_size=128, epochs=90, validation_data=(x_test, y_test))

Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
E

<tensorflow.python.keras.callbacks.History at 0x7f7bef05ac10>

> 可以發現MLP似乎到達了一個瓶頸，兩個模型其實都有些underfitting，那麼我們選擇表現較好的CNN類型去做擴充

In [20]:
model_cnn_multi = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Conv2D(64, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.MaxPooling2D(),
    layers.GlobalMaxPooling2D(),
    layers.Dense(10, activation='softmax')
])


model_cnn_multi.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc']
)

model_cnn_multi.fit(
    x=x_train,
    y=y_train,
    epochs=100,
    batch_size=128,
    validation_data=(x_test, y_test)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100

KeyboardInterrupt: ignored

> 可以看到模型學習非常快，效果也不錯！但是馬上就overfitting了，這邊需要加強泛化能力

> 發現沒有用activation 先用一下!

- BatchNormalization
    - 激活函數之前
- Dropout

In [22]:
model_cnn_multi_av = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Conv2D(64, 3, padding='same'),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.GlobalMaxPooling2D(),
    layers.Dense(10, activation='softmax')
])


model_cnn_multi_av.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc']
)

model_cnn_multi_av.fit(
    x=x_train,
    y=y_train,
    epochs=30,
    batch_size=128,
    validation_data=(x_test, y_test)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f7b92fdae50>

> 有activation function透過非線性的擬合能力，幫助模型效果更好了一些!

In [23]:
model_cnn_multi_av_bn = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Conv2D(64, 3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.GlobalMaxPooling2D(),
    layers.Dense(10, activation='softmax')
])


model_cnn_multi_av_bn.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc']
)

model_cnn_multi_av_bn.fit(
    x=x_train,
    y=y_train,
    epochs=30,
    batch_size=128,
    validation_data=(x_test, y_test)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f7b92db7e90>

> 加入BN之後，效果: 可以看到generalize有些許提升，但並不穩定。

> [試試看Dropout](https://kknews.cc/zh-tw/code/mnvee3p.html) --> CNN Dropout 意義不大，因為神經元本來就很少!


In [25]:
model_cnn_multi_av_drop = keras.Sequential([
    layers.InputLayer(input_shape=(32, 32, 3)),
    layers.Conv2D(64, 3, padding='same'),
    layers.Dropout(0.5),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.Dropout(0.5),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same'),
    layers.Dropout(0.5),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.Dropout(0.5),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same'),
    layers.Dropout(0.5),
    layers.Activation('relu'),
    layers.MaxPooling2D(),
    layers.GlobalMaxPooling2D(),
    layers.Dense(10, activation='softmax')
])


model_cnn_multi_av_drop.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.CategoricalCrossentropy(from_logits=False),
    metrics=['acc']
)

model_cnn_multi_av_drop.fit(
    x=x_train,
    y=y_train,
    epochs=30,
    batch_size=128,
    validation_data=(x_test, y_test)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f7b92a4ddd0>

> 可以看到，的確反而讓模型更難以學習!