In [1]:
import keras
from keras.models import Model
from keras.layers import Input,Dense
from keras.callbacks import TensorBoard

import numpy as np
import matplotlib.pyplot as plt
from miniBatch import random_mini_batche  
from data_utils import get_CIFAR10_data

Using TensorFlow backend.


### Load Data

#train set: 2000 

#val set: 100

#test set: 100

In [2]:
# Load the (preprocessed) CIFAR10 data.
data = get_CIFAR10_data()
for k, v in data.items():
    print('%s: ' % k, v.shape)

X_train:  (49000, 3, 32, 32)
y_train:  (49000,)
X_val:  (1000, 3, 32, 32)
y_val:  (1000,)
X_test:  (1000, 3, 32, 32)
y_test:  (1000,)


In [3]:
def load_data(index_train,index_val,index_test):
    """
    Load data set
    
    Parameters:
    ----------
       index_train: training set index range.
       index_val: val data set index range.
       index_test : test data set index range.
    
    Returns:
    -------
       train_x: train set data
       train_y train set labels
       val_x: val set data
       val_y: val set labels
       test_x: test set data
       test_y: test set labels.
    """
    train_x = data['X_train'][:index_train].reshape(index_train,-1)
    train_y = data['y_train'][:index_train]
    
    val_x = data['X_val'][:index_val].reshape(index_val,-1)
    val_y = data['y_val'][:index_val]
    
    test_x = data['X_test'][:index_test].reshape(index_test,-1)
    test_y = data['y_test'][:index_test]
    
    return train_x,train_y,val_x,val_y,test_x,test_y

In [4]:
train_x,train_y,val_x,val_y,test_x,test_y = load_data(2000,100,100)

https://stackoverflow.com/questions/45947111/how-to-specify-the-axis-when-using-the-softmax-activation-in-a-keras-layer

### Build Keras of BN and BL

函数解释:

(1)

```python
def __init__(self,layers,mode,epochs,lr,batch_size,fielname):
    ...```
    
该函数做预备初始化作用.


    
(2) 由于我们需要对比Batch Normalization和Base Lines,所以我们需要构建两个函数来分别处理:

(2.1)

```python
def fit_BN(self,Z):
    ...```

(2.1.1) 对于BN层,我们需要使用```keras.layers.BatchNormalization```并指定```axis=1```一般选择的是特征轴,与TF,Pytorch不同的是Keras不需要对BN的Testing做处理,Keras会自动处理.更多详细查看[BatchNormalization](https://keras.io/zh/layers/normalization/#batchnormalization)

(2.1.2) 由于BN层我们不需要初始化bias,所以指定```use_bias=False```

(2.1.3) 使用output layer:softmax,hidden layers:relu.


(2.2)

```python
def fit_BL(self,Z):
    ...
```

BL层与普通之前是一样的,这里就不多说了.


(3)

```python
def fit(self,x,y,val_x,val_y):
    ...```

(3.1) 由于我们需要使用softmax多分类,所以我们需要将labels转换成hot形式:```keras.utils.to_categorical```.

(3.2) 构建Model

(3.3) 构建optimizer:SGD

(3.4) 构建Loss:categorical_crossentropy

(3.5) 编译model:```model.compile```

(3.6) 训练model,并指定验证样本```validation_data```,使用Tensorbord储存```callbacks=[TensorBoard(...)]```信息.


In [5]:
class BN_BL:
    """
    Build Batch Normalization and Base Lines.
    """
    def __init__(self,layers,mode,epochs,lr,batch_size,fielname):
        """
        Parameters:
        ----------
            layers: hidden layers. this sample [100,80,50,30,10]
            mode: choose BN or BL.
            epochs: #Iter.
            lr: learning rate.
            batch_size: batch size.
            fielname: save tensorborder path.
        """
        self.layers = layers
        self.mode = mode
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.fielname = fielname
        
    def fit_BN(self,Z):
        """
        Parameters:
        ----------
            Z: first hidden layer input value.
        Return:
        ------
            outputs: last layer output value.
        """
        for l in range(self.L):
            Z = Dense(self.layers[l],activation='relu',use_bias=False)(Z)
            Z = keras.layers.BatchNormalization(axis=0)(Z) # using BN layer
            if l == self.L -1:
                Z = Dense(self.layers[-1],activation='softmax',use_bias=False)(Z)
                outputs = Z
        return outputs
    
    def fit_BL(self,Z):
        """
        Parameters:
        ----------
            Z: first hidden layer input value.
        Return:
        ------
            outputs: last layer output value.
        """
        for l in range(self.L):
            Z = Dense(self.layers[l],activation='relu')(Z)
            if l == self.L -1:
                Z = Dense(self.layers[-1],activation='softmax')(Z)
                outputs = Z
        return outputs
        
    def fit(self,x,y,val_x,val_y):
        """
        Fitting Model.
        
        Parameters:
        ----------
            x: training date set.
            y: training labels.
            val_x: validation date.
            val_y: validation labels.
        
        Return:
        ------
            self.model: Kreas BN_Bl model.
            
        """
        
        m,n = x.shape
        self.n_classes = len(np.unique(y))
        
        # change hot labels from labels..
        y_hot = keras.utils.to_categorical(y,self.n_classes)
        val_y_hot = keras.utils.to_categorical(val_y,self.n_classes)
        self.L = len(self.layers)
        
        inputs = Input((n,)) # create input layers
        Z = inputs 
        
        # choose running mode.
        if self.mode == "BN":
            outputs = self.fit_BN(Z)
        elif self.mode == "BL":
            outputs = self.fit_BL(Z)
        else:
            print('Valide mode %s'%self.mode)
            
        # create Model
        self.model = Model(inputs=inputs, outputs=outputs)
        # create optimizer
        optimizer = keras.optimizers.SGD(lr=self.lr)
        # create loss function
        Loss_func = keras.losses.categorical_crossentropy
        # compile model
        self.model.compile(optimizer=optimizer,loss=Loss_func,metrics=['accuracy'])
        # fitting model.
        self.model.fit(x=x,y=y_hot,batch_size=self.batch_size,epochs=self.epochs,
                  validation_data=(val_x,val_y_hot),verbose=0,callbacks=[TensorBoard(log_dir=self.fielname)])
        
        return self.model
        
    def score(self,x,y):
        """
        Score model.
        Parameters:
        ----------
            x: score data.
            y: score labels.
        """
        y = keras.utils.to_categorical(y,self.n_classes)
        loss,acc = self.model.evaluate(x,y)
        print('The loss {} acc {}'.format(loss,acc))
    

### Testing BN and BaseLine

测试不同情况下,BN网络与普通网络之间的差异:

(1) 小权重,小学习率

(2) 小权重,大学习率

**Ps:**

```keras.backend.clear_session()```清空session,这样Keras才不会进行session叠加,否则产生的网络结构是累加的.

#### 小权重,小学习率

BN:```Tensorborder:small_weights_small_lr_BN```

BL:```Tensorborder:small_weights_small_lr_BL```

In [6]:
keras.backend.clear_session()

In [7]:
layers = [100,80,50,30,10]
clf = BN_BL(layers=layers,mode="BN",epochs=1000,lr=0.001,batch_size=60,fielname="small_weights_small_lr_BN")

In [8]:
clf.fit(train_x,train_y,val_x,val_y)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


<keras.engine.training.Model at 0x127eebe10>

In [9]:
clf.score(test_x,test_y)

The loss 3.3252144145965574 acc 0.29


In [10]:
keras.backend.clear_session()

In [11]:
layers = [100,80,50,30,10]
clf = BN_BL(layers=layers,mode="BL",epochs=1000,lr=0.001,batch_size=60,fielname="small_weights_small_lr_BL")
clf.fit(train_x,train_y,val_x,val_y)

<keras.engine.training.Model at 0xb354c3b38>

In [12]:
clf.score(test_x,test_y)

The loss 7.965495491027832 acc 0.26


#### 小权重,大学习率

BN:```Tensorborder:small_weights_big_lr_BN```

BL:```Tensorborder:small_weights_big_lr_BL```

In [13]:
keras.backend.clear_session()

In [14]:
layers = [100,80,50,30,10]
clf = BN_BL(layers=layers,mode="BN",epochs=1000,lr=0.1,batch_size=60,fielname="small_weights_big_lr_BN")
clf.fit(train_x,train_y,val_x,val_y)

<keras.engine.training.Model at 0xb369b9f28>

In [15]:
clf.score(test_x,test_y)

The loss 5.36355712890625 acc 0.38


In [16]:
keras.backend.clear_session()

In [17]:
layers = [100,80,50,30,10]
clf = BN_BL(layers=layers,mode="BL",epochs=1000,lr=0.1,batch_size=60,fielname="small_weights_big_lr_BL")
clf.fit(train_x,train_y,val_x,val_y)

<keras.engine.training.Model at 0xb378c0ba8>

In [18]:
clf.score(test_x,test_y)

The loss 14.8286474609375 acc 0.08


#### Tensorbord

BN 网络结构

<img src="../../../picture/59.png" width=300 heigth="300">
BL 网络结构
<img src="../../../picture/60.png" width=300 heigth="300">

### Summary 

BN效果显著与BL效果,具体详情查看对应的Tensorbord.

**Ps:**

val_loss会出现先下降后上升的情况,在loss上升的时候即为开始过拟合,这是正常现象,因为我们没有对数据集和网络做任何处理,此处只是在对比BN与BL.