## ResNet18

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential, datasets, optimizers
from tensorflow import keras
import os
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

In [8]:
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2345)

In [12]:
# 定义 BasicBlock类（残差基本单元）
class BasicBlock(layers.Layer):
    # 其中 filter_num 是该卷积核通道的数量，是把输入进该卷积核的channel转换为数量为filter_num大小
    def __init__(self, filter_num, stride=1):
        super(BasicBlock, self).__init__()
        # 定义BasicBlock中的第一个卷积单元核，包括3*3的卷积、BachNom、ReLU。
        # 这个卷积核接受一个输入数据 [b, h, w, c],
        self.conv1 = layers.Conv2D(filter_num, kernel_size=[3, 3], strides=stride, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.relu1 = layers.Activation('relu')
        # 定义BasicBlock中的第二个卷积单元变量
        # 其中strides 设置为1，表示为不间隔进行卷积操作，不减少featureMap
        self.conv2 = layers.Conv2D(filter_num, kernel_size=[3, 3], strides=1, padding='same')
        self.bn2 = layers.BatchNormalization()
        self.relu2 = layers.Activation('relu')
        # 定义残差网络中，短接的那一部分；分为stride为1和不为1两种情况，满足最后加和条件
        if stride != 1:
            self.downsample = Sequential()
            self.downsample.add(layers.Conv2D(filter_num, kernel_size=[1, 1], strides=stride))
        else:
            self.downsample = lambda x : x
            
    # call() 在调用该类时调用这个call() 方法，实现前向传播，在调用BasicBlock类对象时，执行该方法，
    # 第一个参数是输入该神经网络的数据 [b, h, w, c]
    def call(self, inputs, training=None):
        # inputs [b, h, w, c]
        out1 = self.conv1(inputs)
        out1 = self.bn1(out1)
        out1 = self.relu1(out1)
        
        out2 = self.conv2(out1)
        out2 = self.bn2(out2)
        
        identity_out = self.downsample(inputs)
        output = layers.add([out2, identity_out])    # 调用layers.add() 实现对应元素相加
        output = self.relu2(output)
        
        return output
    
# 定义RetNet 类（包括定义残差块build_RestBlock，包括两个基本的残差基本单元）
class RetNet(keras.Model):
    # 定义初始化函数
    def __init__(self, layer_dims, num_classes=100):
        super(RetNet, self).__init__()
        # 定义预处理的一个卷积层
        self.stem = Sequential([
            layers.Conv2D(64, kernel_size=[3, 3], strides=1),
            layers.BatchNormalization(),
            layers.Activation('relu'),
            layers.MaxPool2D(pool_size=(2, 2), strides=1, padding='same')
        ])
        # 创建4个RestBlock，指定每个的channel（filter数量），
        # 其中从第二个起设置stride=2，以达到减小featureMap的目的
        self.layer1 = self.build_RestBlock(64, layer_dims[0])
        self.layer2 = self.build_RestBlock(128, layer_dims[1], stride=2)
        self.layer3 = self.build_RestBlock(256, layer_dims[2], stride=2)
        self.layer4 = self.build_RestBlock(512, layer_dims[3], stride=2)
        # 考虑到全连接层输入决定于前面层的输出 [b, h, w, 512]，需要降维打平操作
        # 调用layers.GlobalAveragePooling2D()方法，简称GAP，全局池化层
        # 具体可参考：https://www.cnblogs.com/hutao722/p/10008581.html
        self.avgpool = layers.GlobalAveragePooling2D()
        # 创建全连接层，用来分类,参数是输出类别数
        self.fc = layers.Dense(num_classes)
    # 前向传播    
    def call(self, inputs, training=None):
        # 预处理卷积层
        x = self.stem(inputs)
        # 四个残差块，包括8个残差单元，一个经过16个卷积核
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # 全池化层 [b, c]
        x = self.avgpool(x)
        # 全连接层 [b, 100]
        x = self.fc(x)
        
        return x
    
    # 定义残差块，包括两个残差基本单元，filter_num该残差块中通道的数量，blocks表示包含的残差基本单元数
    def build_RestBlock(self, filter_num, blocks, stride=1):
        # 定义神经网络容器Sequential
        res_blocks = Sequential()
        # 在res_blocks添加层（残差基本单元BasicBlock），可能发生下采样
        res_blocks.add(BasicBlock(filter_num, stride))
        # 
        for _ in range(1, blocks):    # 设置为从1到 blocks，其中包括1，但不包括 blocks
            # 不允许下采样，因为在残差单元中要保存featureMap不变，便于与identity进行加和操作
            res_blocks.add(BasicBlock(filter_num, stride=1))
            
        return res_blocks
    
# 定义ResNet18 [2, 2, 2, 2]
def resnet18():
    return RetNet([2, 2, 2, 2])
    
# 定义ResNet34 [3, 4, 6, 3]  
def resnet34():
    return RetNet([3, 4, 6, 3])

In [13]:
# 定义预处理函数
def preprocess(x, y):
    # [-0.5 ~ 0.5]
    x = tf.cast(x, dtype=tf.float32) / 255. - 0.5
    y = tf.cast(y, dtype=tf.int32)
    return x, y

# 加载数据集和数据集预处理
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)

train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(10000).map(preprocess).batch(256)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(256)

# 查看数据集切片
sample = next(iter(train_db))
print('sample_train: ', sample[0].shape, sample[1].shape, tf.reduce_min(sample[0]),
     tf.reduce_max(sample[1]))


sample_train:  (256, 32, 32, 3) (256,) tf.Tensor(-0.5, shape=(), dtype=float32) tf.Tensor(99, shape=(), dtype=int32)


In [15]:
# 生成ResNet18网络
model = resnet18()
model.build(input_shape=(None, 32, 32, 3))
model.summary()

# 定义优化器
optimizer = optimizers.Adam(lr=1e-3)

Model: "ret_net_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_10 (Sequential)   multiple                  2048      
_________________________________________________________________
sequential_11 (Sequential)   multiple                  148736    
_________________________________________________________________
sequential_12 (Sequential)   multiple                  526976    
_________________________________________________________________
sequential_14 (Sequential)   multiple                  2102528   
_________________________________________________________________
sequential_16 (Sequential)   multiple                  8399360   
_________________________________________________________________
global_average_pooling2d_1 ( multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  51

In [17]:

for epoch in range(100):
    for step, (x, y) in enumerate(train_db):
        with tf.GradientTape() as tape:   
            logits = model(x)   
            y_onehot = tf.one_hot(y, depth=500)
            # compute loss
            loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
            loss = tf.reduce_mean(loss)
        
        # compute grades    
        grads = tape.gradient(loss, model.trainable_variables)
        # update variables 
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        # show loss
        if step % 100 == 0:
            print(epoch, step, 'loss: ', float(loss))
            
    # test    
    total_num = 0
    total_correct = 0
    for x, y in test_db:
        logits = model(x)
        prob = tf.nn.softmax(logits, axis=1)
        preb = tf.argmax(prob, axis=1)    # int64
        preb = tf.cast(preb, dtype=tf.int32)
        
        correct = tf.cast(tf.equal(preb, y), dtype=tf.int32)
        correct = tf.reduce_sum(correct)
        
        total_num += x.shape[0]
        total_correct += int(correct)
        
    acc = total_correct / total_num
    print(epoch, 'acc: ', acc)
    
    

0 0 loss:  4.603561878204346
0 100 loss:  4.31700325012207
0 acc:  0.0589
1 0 loss:  4.096081733703613
1 100 loss:  3.809342384338379
1 acc:  0.1451
2 0 loss:  3.621166944503784
2 100 loss:  3.3795464038848877
2 acc:  0.2145
3 0 loss:  3.22817325592041
3 100 loss:  2.904437780380249
3 acc:  0.2602
4 0 loss:  2.857774257659912
4 100 loss:  2.6984262466430664
4 acc:  0.287
5 0 loss:  2.5338616371154785
5 100 loss:  2.5153427124023438
5 acc:  0.3092
6 0 loss:  2.3365795612335205
6 100 loss:  2.3313212394714355
6 acc:  0.3099
7 0 loss:  2.1136186122894287
7 100 loss:  2.0633597373962402
7 acc:  0.317
8 0 loss:  1.8272517919540405
8 100 loss:  1.9870266914367676
8 acc:  0.3022
9 0 loss:  1.8123266696929932
9 100 loss:  1.8604434728622437
9 acc:  0.2904
10 0 loss:  1.7031092643737793
10 100 loss:  1.7336666584014893
10 acc:  0.2994
11 0 loss:  1.4488496780395508
11 100 loss:  1.583518385887146
11 acc:  0.3027
12 0 loss:  1.4021451473236084
12 100 loss:  1.482235074043274
12 acc:  0.2768
13 0