# Network in network  
Artical: Network In Network(https://arxiv.org/pdf/1312.4400.pdf)  
CODE(https://zhuanlan.zhihu.com/p/28339912)

### 1.NIN Structure
![NIN](Figure/NIN.png)

### 2. NIN Parameter
![NIN_Parameter](Figure/NIN_Parameter.png)

### 3. MLPconv层
Mlpconv层可以看成是每个卷积的局部感受野中还包含了一个微型的多层网络
![MLPCONV](Figure/MLPConv.png)

###  4. Global Average Pooling

传统的CNN最后一层都是全连接层，参数个数非常之多，容易引起过拟合（如Alexnet）,一个CNN模型，大部分的参数都被全连接层给占用了，  
故这篇paper提出采用了：全局均值池化，替代全连接层

In [5]:
import keras
import numpy as np
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D,MaxPooling2D,GlobalAveragePooling2D,AveragePooling2D

In [6]:
from keras.initializers import RandomNormal
from keras import optimizers
from keras.callbacks import LearningRateScheduler,TensorBoard
from keras.layers.normalization import BatchNormalization

In [7]:
batch_size = 128
epochs = 10
iterations = 391
num_classes = 10
dropout = 0.5
weight_decay = 0.0001
log_filepath = './nin'

In [8]:
from keras import backend as K

In [10]:
if('tensorflow'==K.backend()):
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config = config)

In [11]:
def color_processing(x_train,x_test):
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    mean = [125.307,122.95,111.865]
    std = [62.9932,62.0887,66.7048]
    for i in range(3):
        x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
        x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i] / std[i])
    return x_train,x_test

In [12]:
def scheduler(epoch):
    if epoch <= 80:
        return 0.01
    if epoch <= 140:
        return 0.005
    return 0.001

In [21]:
def build_model():
    model = Sequential()
    model.add(Conv2D(192,(5,5),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay),input_shape=x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(160,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(Conv2D(96,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same'))
    
    model.add(Dropout(dropout))
    
    model.add(Conv2D(192,(5,5),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(Conv2D(192,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(Conv2D(192,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same'))
              
    model.add(Dropout(dropout))
    
    model.add(Conv2D(192,(3,3),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(Conv2D(192,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(Conv2D(10,(1,1),padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    
    model.add(GlobalAveragePooling2D())
    model.add(Activation('softmax'))
    
    sgd = optimizers.SGD(lr = 0.1,momentum=0.9,nesterov=True)
    model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
    return model

In [23]:
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
y_train = keras.utils.to_categorical(y_train,num_classes)
y_test = keras.utils.to_categorical(y_test,num_classes)
model = build_model()

In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 192)       14592     
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 192)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 160)       30880     
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 160)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 96)        15456     
_________________________________________________________________
activation_3 (Activation)    (None, 32, 32, 96)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 96)        0         
__________

In [25]:
tb_cb = TensorBoard(log_dir=log_filepath,histogram_freq=0)
change_lr = LearningRateScheduler(scheduler)
cbks = [change_lr,tb_cb]

In [26]:
datagen = ImageDataGenerator(horizontal_flip=True,
                             height_shift_range=0.125,
                             fill_mode='constant',
                             cval=0.,
                             width_shift_range=0.125)

In [28]:
datagen.fit(x_train)

In [29]:
model.fit_generator(datagen.flow(x_train,y_train,batch_size=batch_size),
                    steps_per_epoch=iterations,
                    epochs=epochs,
                    callbacks=cbks,
                    validation_data=(x_test,y_test))

Epoch 1/10

KeyboardInterrupt: 

### 5. Batch Normalization
Source(CN): https://www.zhihu.com/question/38102762  