# GoogLeNet中的基础卷积块叫 Inception

In [1]:
import sys 
sys.path.append("../")

In [2]:
import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import nn,data as gdata
import mxnet as mx

  from ._conv import register_converters as _register_converters


In [3]:
class Inception(nn.Block):
    #c1,c2,c3,c4为每个路线输出的通道个数
    def __init__(self,c1,c2,c3,c4,**kwargs):
        super(Inception,self).__init__(**kwargs)
        self.p1_1 = nn.Conv2D(c1,kernel_size=1,activation='relu')
        self.p2_1 = nn.Conv2D(c2[0],kernel_size=1,activation='relu')
        self.p2_2 = nn.Conv2D(c2[1],kernel_size=3,padding=1,activation='relu')
        self.p3_1 = nn.Conv2D(c3[0],kernel_size=1,activation='relu')
        self.p3_2 = nn.Conv2D(c3[1],kernel_size=5,padding=2,activation='relu')
        self.p4_1 = nn.MaxPool2D(pool_size=3,padding=1,strides=1)
        self.p4_2 = nn.Conv2D(c4,kernel_size=1,activation='relu')
    def forward(self,X):
        p1 = self.p1_1(X)
        p2 = self.p2_2(self.p2_1(X))
        p3 = self.p3_2(self.p3_1(X))
        p4 = self.p4_2(self.p4_1(X))
        
        #最后所有将结果在输出通道上连接
        return nd.concat(p1,p2,p3,p4,dim=1)

In [4]:
inception = Inception(10,(5,10),(5,10),10)

In [5]:
X = nd.random.uniform(shape =(1,1,96,96),ctx=mx.gpu())
inception.initialize(ctx=mx.gpu(),force_reinit=True)
print(inception(X).shape) #输出为四个通道结构相加

(1, 40, 96, 96)


## GoogLeNet模型

### 主体卷积部分使用了五个模块，每个模块之间使用步幅为2的3 x 3 最大池化层来减小输出高宽

* 第一个模块使用了64通道的7 * 7 卷积层

In [6]:
b1 = nn.Sequential()
b1.add(nn.Conv2D(channels=64,kernel_size=7,strides=2,padding=3,activation='relu'),
       nn.MaxPool2D(pool_size=3,strides=2,padding=1))

* 第二个模块使用了两个卷积层：首先是64通道的1 x 1卷积层，然后是将通道增大3倍的3x3卷积层。它对应Inception块中的第二条线路

In [7]:
b2 =nn.Sequential()
b2.add(nn.Conv2D(channels=64,kernel_size=1,activation='relu'),
       nn.Conv2D(channels=192,kernel_size=3,padding=1),
       nn.MaxPool2D(pool_size=3,strides=2,padding=1))

* 第三个模块串联两个完整的Inception块。第一个Inception块的输出通道数为256，其中四个线路的输出通道比例为2:4:1:1,。且第二、三线路分别将输入通减小2倍和12倍后再进入第二层卷积层。第⼆个 Inception 块输出通道数增⾄ 480，每个线路的通道⽐例为 4:6:3:2。且第⼆、三线路先分别减少 2 倍和 8 倍通道数。

In [8]:
b3 = nn.Sequential()
b3.add(Inception(64,(96,128),(16,32),32),
       Inception(128,(128,192),(32,96),64),
       nn.MaxPool2D(pool_size=3,strides=2,padding=1))


+ 第四模块更加复杂。它串联了五个 Inception 块，其输出通道数分别是 192 + 208 + 48 + 64 =512、 160 + 224 + 64 + 64 = 512、 128 + 256 + 64 + 64 = 512、 112 + 288 + 64 + 64 = 528 和256 + 320 + 128 + 128 = 832。这些线路的通道数分配和第三模块中的类似：含 3 × 3 卷积层的第⼆条线路输出最多通道，其次是仅含 1 × 1 卷积层的第⼀条线路，之后是含 5 × 5 卷积层的第三条线路和含 3 × 3 最⼤池化层的第四条线路。其中第⼆、第三条线路都会先按⽐例减小通道数。这些⽐例在各个 Inception 块中都略有不同。

In [9]:
b4 = nn.Sequential()
b4.add(Inception(192, (96, 208), (16, 48), 64),
Inception(160, (112, 224), (24, 64), 64),
Inception(128, (128, 256), (24, 64), 64),
Inception(112, (144, 288), (32, 64), 64),
Inception(256, (160, 320), (32, 128), 128),
nn.MaxPool2D(pool_size=3, strides=2, padding=1))

+ 第五模块有输出通道数为 256 + 320 + 128 + 128 = 832 和 384 + 384 + 128 + 128 = 1024 的两个Inception 块。其中每条线路的通道数分配思路和第三、第四模块中的⼀致，只是在具体数值上有所不同。需要注意的是，第五模块的后⾯紧跟输出层，该模块同 NiN ⼀样使⽤全局平均池化层来将每个通道的⾼和宽变成 1。最后我们将输出变成⼆维数组后接上⼀个输出个数为标签类数的全连接层。

In [10]:
b5 = nn.Sequential()
b5.add(Inception(256, (160, 320), (32, 128), 128),
       Inception(384, (192, 384), (48, 128), 128),
        nn.GlobalMaxPool2D(),
        )

GoogLeNet = nn.Sequential()
GoogLeNet.add(b1,b2,b3,b4,b5,nn.Dense(10))

In [11]:
 X = nd.random.uniform(shape=(1, 1, 96, 96),ctx=mx.gpu())
GoogLeNet.initialize(ctx=mx.gpu())
for layer in GoogLeNet:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

sequential0 output shape:	 (1, 64, 24, 24)
sequential1 output shape:	 (1, 192, 12, 12)
sequential2 output shape:	 (1, 480, 6, 6)
sequential3 output shape:	 (1, 832, 3, 3)
sequential4 output shape:	 (1, 1024, 1, 1)
dense0 output shape:	 (1, 10)


In [12]:
def load_data_fashion_mnist(batch_size,resize=None):
    transformer = []
    path = '../chapter1_baseKnowledge/FashionMNIST/'
    if resize:
        transformer += [gdata.vision.transforms.Resize(resize)]
    transformer += [gdata.vision.transforms.ToTensor()]
    transformer = gdata.vision.transforms.Compose(transformer)
    mnist_train =gdata.vision.FashionMNIST(root=path,train=True)
    mnist_test =gdata.vision.FashionMNIST(root= path,train=False)
    
    train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),batch_size,shuffle = True)
    test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),batch_size,shuffle=False)
    
    return train_iter,test_iter

In [13]:
lr, num_epochs, batch_size, ctx = 0.1, 5, 32, gb.try_gpu()
GoogLeNet.initialize(force_reinit=True,init=init.Xavier(),ctx=ctx)
train_iter,test_iter = load_data_fashion_mnist(batch_size =batch_size,resize=224)
trainer = gluon.Trainer(GoogLeNet.collect_params(), 'sgd', {'learning_rate': lr})

In [14]:
gb.train_ch5(GoogLeNet,train_iter,test_iter,batch_size,trainer,ctx,num_epochs)

training on gpu(0)


MXNetError: [21:50:43] c:\jenkins\workspace\mxnet\mxnet\src\operator\nn\./cudnn/cudnn_convolution-inl.h:663: Check failed: e == CUDNN_STATUS_SUCCESS (2 vs. 0) cuDNN: CUDNN_STATUS_ALLOC_FAILED