# 使用Functional API 的函數建立新的神經網路，並加入分歧

## 合併Sequential模型建立的神經網路函數

In [1]:
%env KERAS_BACKEND=tensorflow

env: KERAS_BACKEND=tensorflow


In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from ipywidgets import interact, IntSlider, Button

In [3]:
# Keras functions
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

# Keras dataset
from keras.datasets import mnist

# Keras utils
from keras.utils import np_utils

Using TensorFlow backend.


In [4]:
(x_train0, y_train0), (x_test0, y_test0) = mnist.load_data()

In [5]:
print("There are %d training data with size %d x %d" %x_train0.shape)
print("There are %d testing  data with size %d x %d" %x_test0.shape)

# print("共 %d 訓練資料，每筆資料的大小為 %d x %d" %x_train.shape)
# print("共 %d 測試資料，每筆資料的大小為 %d x %d" %x_test.shape)

There are 60000 training data with size 28 x 28
There are 10000 testing  data with size 28 x 28


In [6]:
x_train = x_train0.reshape(60000, 784)
x_test = x_test0.reshape(10000, 784)
x_train -= x_train.min()
x_train = x_train/x_train.max()

In [7]:
# 將資料線性單位化至 [0, 1]
x_train.min(), x_train.max()

(0.0, 1.0)

In [8]:
y_train = np_utils.to_categorical(y_train0, 10)
y_test = np_utils.to_categorical(y_test0, 10)

### 互動模式畫個圖

In [9]:
def plotNumber(idx):
    plt.imshow(x_train0[idx], 'Greys')
    plt.title("Number: %d " %(y_train0[idx]))
    plt.axis('off')

In [10]:
interact(plotNumber, 
         idx=IntSlider(value=0, description='Data Index', min=0, max=x_train0.shape[0]-1))

interactive(children=(IntSlider(value=0, description='Data Index', max=59999), Output()), _dom_classes=('widge…

<function __main__.plotNumber(idx)>

### 合併Sequential模型建立的神經網路函數

$$Sequential模型是的函數$$

$$\hat{f} \colon \mathbb{R}^{784} \to \mathbb{R}^{10}$$


建立一個具有兩個隱藏層的神經網路函數如下：

$$\mathbb{R}^{784} \overset{f_1}{\to} \mathbb{R}^{200} \overset{f_2}{\to} \mathbb{R}^{75} \overset{f_3}{\to} \mathbb{R}^{10}$$

$$x \overset{f_1}{\mapsto} h_1 \overset{f_2}{\mapsto} h_2 \overset{f_3}{\mapsto} y$$

In [11]:
all_except_last = [Dense(200, input_dim=784), 
                   Activation('sigmoid'),
                   Dense(75), 
                   Activation('sigmoid')]

output_layer = [Dense(10), 
                Activation('softmax')]


#合併起來送進 `Sequential` 
model_num = Sequential(all_except_last + output_layer)
model_num.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 200)               157000    
_________________________________________________________________
activation_1 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 75)                15075     
_________________________________________________________________
activation_2 (Activation)    (None, 75)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                760       
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 172,835
Trainable params: 172,835
Non-trainable params: 0
_________________________________________________________________


In [12]:
# 檢察 model_Org.layers
model_num.layers

[<keras.layers.core.Dense at 0x14178f203c8>,
 <keras.layers.core.Activation at 0x14178f53898>,
 <keras.layers.core.Dense at 0x14178f53710>,
 <keras.layers.core.Activation at 0x14178f53278>,
 <keras.layers.core.Dense at 0x14178f200b8>,
 <keras.layers.core.Activation at 0x14178f424a8>]

In [13]:
model_num.compile(optimizer=SGD(lr=0.09), loss='categorical_crossentropy', metrics=['acc'])

In [14]:
model_num.fit(x_train, y_train, 
          batch_size=100, 
          epochs=5, 
          verbose=1, 
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x141737cd668>

In [15]:
score = model_num.evaluate(x_train, y_train, batch_size=1000)
print("Loss: %f" %score[0])
print("準確率: %f" %(score[1]*100))

Loss: 0.324406
準確率: 90.681666


# 使用 Functional API 的操作方式

In [16]:
from keras.models import Model
from keras.layers import Input

In [17]:
f_1 = Dense(200, activation='sigmoid')
f_2 = Dense(75, activation='sigmoid')
f_3 = Dense(10, activation='softmax')

In [18]:
x = Input(shape=(784,))

In [19]:
print(x)

Tensor("input_1:0", shape=(?, 784), dtype=float32)


In [20]:
h_1 = f_1(x)
h_2 = f_2(h_1)
y = f_3(h_2)

變數 $h_1, h_2, y$ 是以張量 (tensor) 類別來表示。

In [21]:
print(h_1)
print(h_2)
print(y)

Tensor("dense_4/Sigmoid:0", shape=(?, 200), dtype=float32)
Tensor("dense_5/Sigmoid:0", shape=(?, 75), dtype=float32)
Tensor("dense_6/Softmax:0", shape=(?, 10), dtype=float32)


透過 `Model` 將一個模型的輸入/輸出包裝起來，完成模型建立

In [22]:
model_Com = Model(x, y)
model_Com.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 200)               157000    
_________________________________________________________________
dense_5 (Dense)              (None, 75)                15075     
_________________________________________________________________
dense_6 (Dense)              (None, 10)                760       
Total params: 172,835
Trainable params: 172,835
Non-trainable params: 0
_________________________________________________________________


## 原Sequential模型是的函數

$$\hat{f} \colon \mathbb{R}^{784} \to \mathbb{R}^{10}$$


建立一個具有兩個隱藏層的神經網路函數如下：

$$\mathbb{R}^{784} \overset{f_1}{\to} \mathbb{R}^{200} \overset{f_2}{\to} \mathbb{R}^{75} \overset{f_3}{\to} \mathbb{R}^{10}$$

$$x \overset{f_1}{\mapsto} h_1 \overset{f_2}{\mapsto} h_2 \overset{f_3}{\mapsto} y$$

其中，$f_1, f_2, f_3$ 代表的是全連結層所代表的函數，其他變數如下：

* x: 784 個輸入神精元(向量)
* h_1: x 第一隱藏層，f_1(x)，是 200 個神精元向量
* h_2: h_1 第二層隱藏層，f_2(h_1)，是 200 個神精元向量
* y: h_2 最後一層運算後得結果，即為 f_3(h_2)， 輸出 是 200 個神經元


# 建立具分歧及合併結構的神經網路模型

In [23]:
from keras.layers import concatenate, add

在模型之間由75個神經元分歧個別50與25，且這個分歧在模型的輸出會合併，則神經網路的結構會變成：

<img src="branch-and-merge_final.png" alt="drawing" style="width: 400px;"/>


* $x$: 代表的是輸入模型的圖片向量，為 784 維的向量。
* $h_1$: $x$ 經過 $f_1$ 隱藏層運算後得結果，即為 $f_1(x)$，為 200 維的向量。
* $h_2$: $h_1$ 經過 $f_2$ 隱藏層運算後得結果，即為 $f_2(h_1)$，為 75 維的向量。

* $z$: $h_1$ 經過 $f_4$ 運算後得結果，即為 $f_4(h_1)$，為 25 維的向量。
* $y$: $h_2$ 和 $z$ 經過新的 $f_3$ 運算後得結果，即為 $f_3(h_1, z)$，為 10 維的向量，代表的是 $x$ 為哪個數字的機率。

其中，$f_1 $ 同之前
$f_2:\mathbb{R}^{200}\to\mathbb{R}^{50}$ 的全連接層，但 `Activation` 改用 `ReLu`。

In [24]:
f_2 = Dense(50, activation='relu')
h_2 = f_2(h_1)

$f_4:\mathbb{R}^{200}\to\mathbb{R}^{25}$ 的全連接層，但 `Activation` 改用 `ReLu`。

In [25]:
#f_2 = Dense(50, activation='relu')
f_4 = Dense(25, activation='relu')
#h_2 = f_2(h_1)
#y = f_3(h_2)
z = f_4(h_1)

$f_3$ 的定義域改變，為 $\mathbb{R}^{50}\times\mathbb{R}^{25}\to\mathbb{R}^{10}$ 函數，所以需要重新定義。

In [26]:
# new f_3
f_3 = Dense(10, activation='softmax')

在這裡，我們將 $h_2$ 與 $z$ `concatenate` 接在一起，稱做 $u$。

In [27]:
u = concatenate([h_2, z])
y = f_3(u)

In [28]:
print(u)
print(y)

Tensor("concatenate_1/concat:0", shape=(?, 75), dtype=float32)
Tensor("dense_9/Softmax:0", shape=(?, 10), dtype=float32)


In [29]:
model_Com = Model(x, y)
model_Com.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 784)          0                                            
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 200)          157000      input_1[0][0]                    
__________________________________________________________________________________________________
dense_7 (Dense)                 (None, 50)           10050       dense_4[0][0]                    
__________________________________________________________________________________________________
dense_8 (Dense)                 (None, 25)           5025        dense_4[0][0]                    
__________________________________________________________________________________________________
concatenat

In [30]:
model_Com.compile(optimizer=SGD(lr=0.09), loss='categorical_crossentropy', metrics=['acc'])

In [31]:
model_Com.fit(x_train, y_train, 
          batch_size=100, 
          epochs=5, 
          verbose=1, 
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1417a3cd7b8>

In [32]:
score_Com = model_Com.evaluate(x_train, y_train, batch_size=1000)
print("Loss: %f" %score_Com[0])
print("準確率: %f" %(score_Com[1]*100))

Loss: 0.234982
準確率: 93.140000
