# NiN模型
网络中的网络（NiN）。它提出了另外一个思路，即串联多个由卷积层和“全连接”层构成的小网络来构建一个深层网络。
## NiN块
卷积层的输入和输出通常是四维数组（样本，通道，高，宽），而全连接层的输入和输出则通常是二维数组（样本，特征）。如果想在全连接层后再接上卷积层，则需要将全连接层的输出变换为四维。$1\times 1$卷积层。它可以看成全连接层，其中空间维度（高和宽）上的每个元素相当于样本，通道相当于特征。因此，NiN使用$1\times 1$卷积层来替代全连接层，从而使空间信息能够自然传递到后面的层中去。
NiN块是NiN中的基础块。它由一个卷积层加两个充当全连接层的$1\times 1$卷积层串联而成。其中第一个卷积层的超参数可以自行设置，而第二和第三个卷积层的超参数一般是固定的。
## NiN模型
NiN是在AlexNet问世不久后提出的。它们的卷积层设定有类似之处。NiN使用卷积窗口形状分别为$11\times 11$、$5\times 5$和$3\times 3$的卷积层，相应的输出通道数也与AlexNet中的一致。每个NiN块后接一个步幅为2、窗口形状为$3\times 3$的最大池化层。

除使用NiN块以外，NiN还有一个设计与AlexNet显著不同：NiN去掉了AlexNet最后的3个全连接层，取而代之地，NiN使用了输出通道数等于标签类别数的NiN块，然后使用全局平均池化层对每个通道中所有元素求平均并直接用于分类。这里的全局平均池化层即窗口形状等于输入空间维形状的平均池化层。NiN的这个设计的好处是可以显著减小模型参数尺寸，从而缓解过拟合。然而，该设计有时会造成获得有效模型的训练时间的增加。

In [25]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import datetime
print(tf.__version__)

for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


2.2.0


In [27]:
class NiN:
    def __init__(self):
        fashion_mnist = tf.keras.datasets.fashion_mnist
        (self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
        self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0,axis=-1)
        self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
        self.train_labels = self.train_labels.astype(np.int32)
        self.test_labels = self.test_labels.astype(np.int32)
        self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]

    def nin_block(self, num_channels, kernel_size, strides, padding):
        blk = models.Sequential()
        blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size,
                                       strides=strides, padding=padding, activation='relu')) 
        blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size=1,activation='relu')) 
        blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size=1,activation='relu'))    
        return blk
    
    def build_nin(self):
        tf.keras.backend.clear_session()
        net = models.Sequential()
        net.add(self.nin_block(96, kernel_size=11, strides=4, padding='valid'))
        net.add(layers.MaxPool2D(pool_size=3, strides=2))
        net.add(self.nin_block(256, kernel_size=5, strides=1, padding='same'))
        net.add(layers.MaxPool2D(pool_size=3, strides=2))
        net.add(self.nin_block(384, kernel_size=3, strides=1, padding='same'))
        net.add(layers.MaxPool2D(pool_size=3, strides=2))
        net.add(layers.Dropout(0.5))
        net.add(self.nin_block(10, kernel_size=3, strides=1, padding='same'))
        net.add(layers.GlobalAveragePooling2D())
        net.add(layers.Flatten())
        net.compile(optimizer= optimizers.Adam(lr=1e-7),
                    loss='sparse_categorical_crossentropy',metrics=['accuracy'])
        return net
    
    def get_batch_train(self, batch_size):
        index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
        #need to resize images to (224,224)
        resized_images = tf.image.resize_with_pad(self.train_images[index],224,224,)
        return resized_images.numpy(), self.train_labels[index]

    def get_batch_test(self, batch_size):
        index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
        #need to resize images to (224,224)
        resized_images = tf.image.resize_with_pad(self.test_images[index],224,224,)
        return resized_images.numpy(), self.test_labels[index]
    
    def see_output_shape(self, net):
        X = tf.random.uniform((1,224,224,1))
        for blk in net.layers:
            X = blk(X)
            print(blk.name, 'output shape:\t', X.shape)
            
    def train_nin(self, net, epoch, batch_size = 128):
        #net.load_weights("./ModelTrain/nin_weights.h5")
        
        %load_ext tensorboard
        log_dir = './log/nin1/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir, histogram_freq=1)
        num_iter = self.num_train//batch_size
        with tf.device('/gpu:0'):
            for e in range(epoch):
                for n in range(num_iter):
                    x_batch, y_batch = self.get_batch_train(batch_size)
                    hitory = net.fit(x_batch, y_batch, callbacks=[tensorboard_callback])
                    if n%20 == 0:
                        net.save_weights("./ModelTrain/nin_weights.h5")
        %tensorboard --logdir log/min1
        return net


In [28]:
nin = NiN()
net = nin.build_nin()
net = nin.train_nin(net,5)











































































































ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
2020-06-18 19:20:12.542118: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
Traceback (most recent call last):
  File "c:\programdata\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\programdata\anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\Scripts\tensorboard.exe\__main__.py", line 7, in <module>
  File "c:\programdata\anaconda3\lib\site-packages\tensorboard\main.py", line 75, in run_main
    app.run(tensorboard.main, flags_parser=tensorboard.configure)
  File "c:\programdata\anaconda3\lib\site-packages\absl\app.py", line 299, in run
    _run_main(main, args)
  File "c:\programdata\anaconda3\lib\site-packages\absl\app.py", line 250, in _run_main
    sys.exit(main(argv))
  File "c:\programdata\anaconda3\lib\site-packages\tensorboard\program

In [31]:
#net.load_weights("5.8_nin_weights.h5")

x_test, y_test = nin.get_batch_test(2000)
net.evaluate(x_test, y_test, verbose=2)
## 因为算力有限，的迭代次数少，训练效果差

63/63 - 1s - loss: 1.5181 - accuracy: 0.3855


[1.5180714130401611, 0.3855000138282776]

In [2]:
%load_ext tensorboard
%tensorboard --logdir log/min1

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
2020-06-18 22:14:48.870569: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library cudart64_101.dll
Traceback (most recent call last):
  File "c:\programdata\anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\programdata\anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\Scripts\tensorboard.exe\__main__.py", line 7, in <module>
  File "c:\programdata\anaconda3\lib\site-packages\tensorboard\main.py", line 75, in run_main
    app.run(tensorboard.main, flags_parser=tensorboard.configure)
  File "c:\programdata\anaconda3\lib\site-packages\absl\app.py", line 299, in run
    _run_main(main, args)
  File "c:\programdata\anaconda3\lib\site-packages\absl\app.py", line 250, in _run_main
    sys.exit(main(argv))
  File "c:\programdata\anaconda3\lib\site-packages\tensorboard\program