# **06. Basic Computer Vision**

**Topic**: Using CNN in computer vision, and visualize the feature maps

In [1]:
from keras import layers
from keras import models
from keras import optimizers
from keras.datasets import mnist
from keras.utils import to_categorical

Using TensorFlow backend.


## <font color='blue'>**初嘗 CNN**</font>

### <font color='dark'>**Build a basic CNN**</font>

In [2]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPool2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [3]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                36928     
__________

### <font color='dark'>**Load Data (MNIST)**</font>

In [4]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32')/255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32')/255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [5]:
print("Train images : ",train_images.shape)
print("Train labels : ",train_labels.shape)
print("Test images : ",test_images.shape)
print("Test labels : ",test_labels.shape)

Train images :  (60000, 28, 28, 1)
Train labels :  (60000, 10)
Test images :  (10000, 28, 28, 1)
Test labels :  (10000, 10)


In [6]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [7]:
model.fit(train_images, train_labels, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x28d3ceee320>

In [8]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
test_acc



0.9908

**Difference between Convolutional Layer and Fully connected Layer**
* Fully connected Layer 是學習全域的特徵，每個小pixel都是參考點
* Convolutional Layer 則是學習局部的特徵，將一個小區域作為特徵點

**Advantages of Convolutional Layer**
* Translation invariant: 例如今天我們可以把右下角學習到的特徵，應用在圖片左上角上面，這符合我們現實生活特徵可以平移的特點
* Spatial hierarchies of patterns: CNN 通常將原本又寬又薄的 input，變得又小又深，就像是我們看東西會從輪廓看到細節，具有層次的結構

## <font color='blue'>**以少量資料來訓練 CNN**</font>

有 3 種方法可以幫忙處理
* data augmentation (資料擴充法)
* feature extration with a pretrained network (使用 pretrained model 來幫助 feature extration)
* fine-tuning a pretrained model (使用預先訓練的 model)

### <font color='dark'>Prepaer Dataset</font>

這邊使用 kaggle 的 dataset, 辨別 貓 和 狗 的圖片,</br>
網址: https://www.kaggle.com/c/dogs-vs-cats/data</br>
我們先把它切成小的 dataset

In [9]:
import os, shutil

original_dataset_dir = 'C:\\Users\\azaz4\\Desktop\\data\\train' # 原本檔案的路徑
base_dir = 'C:\\Users\\azaz4\\Desktop\\data\\small_dataset' # 想解壓縮檔案的路徑

#----------------------------------------------------------------------------
# 如果 base dir 不存在, 那麼建立資料夾
#----------------------------------------------------------------------------
if not os.path.exists(base_dir): os.mkdir(base_dir)

#----------------------------------------------------------------------------
# 將 data 分散成 train, test 及 validation 的資料夾
#----------------------------------------------------------------------------
train_dir = os.path.join(base_dir, 'train')
if not os.path.exists(train_dir): os.mkdir(train_dir)
    
test_dir = os.path.join(base_dir, 'test')
if not os.path.exists(test_dir): os.mkdir(test_dir)
    
val_dir = os.path.join(base_dir, 'val')
if not os.path.exists(val_dir): os.mkdir(val_dir)
    
#----------------------------------------------------------------------------
# 在 train, test 及 validation 下個創立 cat 及 dog 資料夾
#----------------------------------------------------------------------------
train_cat_dir = os.path.join(train_dir, 'cats')
if not os.path.exists(train_cat_dir): os.mkdir(train_cat_dir)
    
train_dog_dir = os.path.join(train_dir, 'dogs')
if not os.path.exists(train_dog_dir): os.mkdir(train_dog_dir)
    
test_cat_dir = os.path.join(test_dir, 'cats')
if not os.path.exists(test_cat_dir): os.mkdir(test_cat_dir)
    
test_dog_dir = os.path.join(test_dir, 'dogs')
if not os.path.exists(test_dog_dir): os.mkdir(test_dog_dir)
    
val_cat_dir = os.path.join(val_dir, 'cats')
if not os.path.exists(val_cat_dir): os.mkdir(val_cat_dir)
    
val_dog_dir = os.path.join(val_dir, 'dogs')
if not os.path.exists(val_dog_dir): os.mkdir(val_dog_dir)

#----------------------------------------------------------------------------
# 把 img 分別依 train, test, val 複製到 cat 及 dog 的資料夾中
#----------------------------------------------------------------------------
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)] #-----cat---train:1000
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_cat_dir, fname)
    shutil.copyfile(src, dst)
    
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]#---cat--test:500
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_cat_dir, fname)
    shutil.copyfile(src, dst)
    
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]#---cat---val:500
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(val_cat_dir, fname)
    shutil.copyfile(src, dst)
    
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)] #-----dog---train:1000
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_dog_dir, fname)
    shutil.copyfile(src, dst)
    
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]#---dog--test:500
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_dog_dir, fname)
    shutil.copyfile(src, dst)
    
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]#---dog---val:500
for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(val_dog_dir, fname)
    shutil.copyfile(src, dst)

In [10]:
print('Train dog dataset : {}'.format(len(os.listdir(train_dog_dir))))
print('Train cat dataset : {}'.format(len(os.listdir(train_cat_dir))))
print('Test dog dataset : {}'.format(len(os.listdir(test_dog_dir))))
print('Test cat dataset : {}'.format(len(os.listdir(test_cat_dir))))
print('Val dog dataset : {}'.format(len(os.listdir(val_dog_dir))))
print('Val cat dataset : {}'.format(len(os.listdir(val_cat_dir))))

Train dog dataset : 1000
Train cat dataset : 1000
Test dog dataset : 500
Test cat dataset : 500
Val dog dataset : 500
Val cat dataset : 500


### <font color='dark'>Build Neuro Net Work</font>

In [11]:
model = models.Sequential()
model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D(2, 2))
model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))
model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))
model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 15, 15, 128)       147584    
__________

In [13]:
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])