# Inception

In [1]:
import mxnet as mx
import sys
import time
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn

In [2]:
class Inception(nn.Block):
    # c1 - c4 are the number of output channels for each layer in the path.
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer.
        self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3 convolutional layer.
        self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
        self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1, activation='relu')
        # Path 3 is a 1 x 1 convolutional layer followed by a 5 x 5 convolutional layer.
        self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
        self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2, activation='relu')
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1 convolutional layer.
        self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
        self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        # Concatenate the outputs on the channel dimension.
        return nd.concat(p1, p2, p3, p4, dim=1)

### Inception Model - Stage 1

In [3]:
b1 = nn.Sequential()
b1.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),
       nn.MaxPool2D(pool_size=3, strides=2, padding=1))

### Inception Model - Stage 2

In [4]:
b2 = nn.Sequential()
b2.add(nn.Conv2D(64, kernel_size=1),
       nn.Conv2D(192, kernel_size=3, padding=1),
       nn.MaxPool2D(pool_size=3, strides=2, padding=1))

### Inception Model - Stage 3

In [5]:
b3 = nn.Sequential()
b3.add(Inception(64, (96, 128), (16, 32), 32),
       Inception(128, (128, 192), (32, 96), 64),
       nn.MaxPool2D(pool_size=3, strides=2, padding=1))

### Inception Model - Stage 4

We use a total of 512 channels (128 + 256 + 64 + 64) 

In [6]:
b4 = nn.Sequential()
b4.add(Inception(192, (96, 208), (16, 48), 64),
       Inception(160, (112, 224), (24, 64), 64),
       Inception(128, (128, 256), (24, 64), 64),
       Inception(112, (144, 288), (32, 64), 64),
       Inception(256, (160, 320), (32, 128), 128),
       nn.MaxPool2D(pool_size=3, strides=2, padding=1))

### Inception Model - Stage 5

In [7]:
b5 = nn.Sequential()
b5.add(Inception(256, (160, 320), (32, 128), 128),
       Inception(384, (192, 384), (48, 128), 128),
       nn.GlobalAvgPool2D())

net = nn.Sequential()
net.add(b1, b2, b3, b4, b5, nn.Dense(38))

Priming the network (at full size)

In [8]:
X = nd.random.uniform(shape=(1, 1, 96, 96))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

sequential0 output shape:	 (1, 64, 24, 24)
sequential1 output shape:	 (1, 192, 12, 12)
sequential2 output shape:	 (1, 480, 6, 6)
sequential3 output shape:	 (1, 832, 3, 3)
sequential4 output shape:	 (1, 1024, 1, 1)
dense0 output shape:	 (1, 38)


In [23]:
def load_data_fashion(batch_size, resize=None):
    data = gluon.data.vision
    trans = [data.transforms.Resize(resize)] if resize else []
    trans.append(data.transforms.ToTensor())
    trans = data.transforms.Compose(trans)
    mnist_train = gluon.data.vision.ImageFolderDataset('/kaggle/input/new-plant-diseases-dataset/train', flag=38).transform_first(trans)
    mnist_test = gluon.data.vision.ImageFolderDataset('/kaggle/input/new-plant-diseases-dataset/test', flag=38).transform_first(trans)
    return (gluon.data.DataLoader(mnist_train, batch_size, shuffle=True,
                                  num_workers=get_dataloader_workers()),
            gluon.data.DataLoader(mnist_test, batch_size, shuffle=False,
                                  num_workers=get_dataloader_workers()))

In [19]:
def evaluate_accuracy(data_iter, net, ctx):
    acc_sum, n = nd.array([0], ctx=ctx), 0
    for X, y in data_iter:
        # If ctx is the GPU, copy the data to the GPU.
        X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.size
    return acc_sum.asscalar() / n

def train(net, train_iter, test_iter, batch_size, trainer, ctx,
              num_epochs):
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [1]:
lr, num_epochs, batch_size, ctx = 0.1, 5, 128, mx.gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=96)
train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

Epoch 1/69

Epoch 00001: val_loss improved from inf to 1.51326, saving model to model.h5
23/23 - 40s - loss: 2.1999 - accuracy: 0.3083 - val_loss: 1.5133 - val_accuracy: 0.4822
Epoch 2/69

Epoch 00002: val_loss improved from 1.51326 to 1.17247, saving model to model.h5
23/23 - 36s - loss: 1.2464 - accuracy: 0.5900 - val_loss: 1.1725 - val_accuracy: 0.5770
Epoch 3/69

Epoch 00003: val_loss improved from 1.17247 to 0.99582, saving model to model.h5
23/23 - 35s - loss: 0.9852 - accuracy: 0.6541 - val_loss: 0.9958 - val_accuracy: 0.6396
Epoch 4/69

Epoch 00004: val_loss improved from 0.99582 to 0.80038, saving model to model.h5
23/23 - 37s - loss: 0.4359 - accuracy: 0.8422 - val_loss: 0.2800 - val_accuracy: 0.8676
Epoch 5/69

Epoch 00005: val_loss improved from 0.80038 to 0.77558, saving model to model.h5
23/23 - 34s - loss: 0.3299 - accuracy: 0.8837 - val_loss: 0.3682 - val_accuracy: 0.8962
