[Inception网络的缺点分析](https://blog.csdn.net/qq_38807688/article/details/84589563)  
[原文地址](https://arxiv.org/pdf/1512.00567.pdf)  
1.Inception架构的复杂性使得更难以对网络进行更改。如果单纯地放大架构，大部分的计算收益可能会立即丢失（计算效率高的优势立即降下来）。

2.缺少一个关于Inception网络的设计决策的明确描述。这使得它更难以在适应新用例的同时保持其效率。例如，如果认为有必要增加一些Inception模型的能力，将滤波器组大小的数量加倍的简单变换将导致计算成本和参数数量增加4倍。这在许多实际情况下可能会被证明是禁止或不合理的，尤其是在相关收益适中的情况下。

In [5]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import datasets, utils, layers, models, optimizers, metrics

seed = 13
np.random.seed(seed)
tf.random.set_seed(seed)

In [9]:

def data_scale_down(train, test,scale):
    (x_train,y_train) = train
    (x_test,y_test) = test
    num_train = len(x_train)
    num_test = len(x_test)
    train_size = num_train // scale
    test_size = num_test // scale
    x_train = x_train[:train_size]
    y_train = y_train[:train_size]
    x_test = x_test[:test_size]
    y_test = y_test[:test_size]
    
    return (x_train,y_train), (x_test,y_test)

def data_normalize(train, test):
    (x_train,y_train) = train
    (x_test,y_test) = test
    x_train, x_test = x_train.astype(np.float32)/255., x_test.astype(np.float32)/255.
    # [b, 28, 28] => [b, 28, 28, 1]
    x_train, x_test = np.expand_dims(x_train, axis=3), np.expand_dims(x_test, axis=3)
    # [b, 1] => [b, 10]
    y_train, y_test = utils.to_categorical(y_train),utils.to_categorical(y_test)
    
    return (x_train,y_train), (x_test,y_test)    

def mnist_dataset():
    (x_train,y_train),(x_test,y_test) = datasets.mnist.load_data()
    (x_train,y_train),(x_test,y_test) = data_scale_down((x_train,y_train),(x_test,y_test),scale=10)
    (x_train,y_train),(x_test,y_test) = data_normalize((x_train,y_train),(x_test,y_test))
    
    db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(256)
    db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)).batch(256)
    print("x_train.shape",x_train.shape)
    print("y_train.shape",y_train.shape)
    print("x_test.shape",x_test.shape)
    print("y_test.shape",y_test.shape)
    
    return db_train, db_test
    
db_train, db_test = mnist_dataset()

x_train.shape (6000, 28, 28, 1)
y_train.shape (6000, 10)
x_test.shape (1000, 28, 28, 1)
y_test.shape (1000, 10)


In [10]:

class ConvBNRelu(tf.keras.Model):
    def __init__(self,channel):
        super(ConvBNRelu, self).__init__()
        #self.channel = channel
        self.model = self.create_model(channel)
        
    def call(self, x, training=None):
        x = self.model(x, training=training)
        return x
    
    def create_model(channel):
        model = models.Sequential()
        model.add(layers.Conv2D(filters=channel, kernel_size=3, activation="relu", padding="same"))
        model.add(layers.BatchNormalization())
        return model


In [None]:
import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras


# In[1]:


tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')


# In[2]:


(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train.astype(np.float32)/255., x_test.astype(np.float32)/255.
# [b, 28, 28] => [b, 28, 28, 1]
x_train, x_test = np.expand_dims(x_train, axis=3), np.expand_dims(x_test, axis=3)

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(256)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(256)

# In[3]:


print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)


# In[4]:


class ConvBNRelu(keras.Model):
    
    def __init__(self, ch, kernelsz=3, strides=1, padding='same'):
        super(ConvBNRelu, self).__init__()
        
        self.model = keras.models.Sequential([
            keras.layers.Conv2D(ch, kernelsz, strides=strides, padding=padding),
            keras.layers.BatchNormalization(),
            keras.layers.ReLU()
        ])
        
        
    def call(self, x, training=None):
        
        x = self.model(x, training=training)
        
        return x 
    
        
        


# In[5]:


class InceptionBlk(keras.Model):
    
    def __init__(self, ch, strides=1):
        super(InceptionBlk, self).__init__()
        
        self.ch = ch
        self.strides = strides
        
        self.conv1 = ConvBNRelu(ch, strides=strides)
        self.conv2 = ConvBNRelu(ch, kernelsz=3, strides=strides)
        self.conv3_1 = ConvBNRelu(ch, kernelsz=3, strides=strides)
        self.conv3_2 = ConvBNRelu(ch, kernelsz=3, strides=1)
        
        self.pool = keras.layers.MaxPooling2D(3, strides=1, padding='same')
        self.pool_conv = ConvBNRelu(ch, strides=strides)
        
        
    def call(self, x, training=None):
        
        
        x1 = self.conv1(x, training=training)

        x2 = self.conv2(x, training=training)
                
        x3_1 = self.conv3_1(x, training=training)
        x3_2 = self.conv3_2(x3_1, training=training)
                
        x4 = self.pool(x)
        x4 = self.pool_conv(x4, training=training)
        
        # concat along axis=channel
        x = tf.concat([x1, x2, x3_2, x4], axis=3)
        
        return x


# In[6]:


class Inception(keras.Model):
    
    def __init__(self, num_layers, num_classes, init_ch=16, **kwargs):
        super(Inception, self).__init__(**kwargs)
        
        self.in_channels = init_ch
        self.out_channels = init_ch
        self.num_layers = num_layers
        self.init_ch = init_ch
        
        self.conv1 = ConvBNRelu(init_ch)
        
        self.blocks = keras.models.Sequential(name='dynamic-blocks')
        
        for block_id in range(num_layers):
            
            for layer_id in range(2):
                
                if layer_id == 0:
                    
                    block = InceptionBlk(self.out_channels, strides=2)
                    
                else:
                    block = InceptionBlk(self.out_channels, strides=1)
                    
                self.blocks.add(block)
            
            # enlarger out_channels per block    
            self.out_channels *= 2
            
        self.avg_pool = keras.layers.GlobalAveragePooling2D()
        self.fc = keras.layers.Dense(num_classes)
        
        
    def call(self, x, training=None):
        
        out = self.conv1(x, training=training)
        
        out = self.blocks(out, training=training)
        
        out = self.avg_pool(out)
        out = self.fc(out)
        
        return out    
            
        


# In[7]:


# build model and optimizer
batch_size = 32
epochs = 100
model = Inception(2, 10)
# derive input shape for every layers.
model.build(input_shape=(None, 28, 28, 1))
model.summary()

optimizer = keras.optimizers.Adam(learning_rate=1e-3)
criteon = keras.losses.CategoricalCrossentropy(from_logits=True)

acc_meter = keras.metrics.Accuracy()


for epoch in range(100):

    for step, (x, y) in enumerate(db_train):

        with tf.GradientTape() as tape:
            # print(x.shape, y.shape)
            # [b, 10]
            logits = model(x)
            # [b] vs [b, 10]
            loss = criteon(tf.one_hot(y, depth=10), logits)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 10 == 0:
            print(epoch, step, 'loss:', loss.numpy())


    acc_meter.reset_states()
    for x, y in db_test:
        # [b, 10]
        logits = model(x, training=False)
        # [b, 10] => [b]
        pred = tf.argmax(logits, axis=1)
        # [b] vs [b, 10]
        acc_meter.update_state(y, pred)

    print(epoch, 'evaluation acc:', acc_meter.result().numpy())