In [1]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers, models

2024-07-17 16:39:39.120392: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'resized_train_cifar100',
    label_mode='int',  # 根据您的标签类型调整，可能需要额外处理标签文件
    batch_size=128,
    image_size=(224, 224))

test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'resized_test_cifar100',
    label_mode='int',
        batch_size=128,
    image_size=(224, 224))

Found 50000 files belonging to 100 classes.


2024-07-17 16:39:42.066011: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-17 16:39:42.652215: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78924 MB memory:  -> device: 0, name: NVIDIA A800 80GB PCIe, pci bus id: 0000:52:00.0, compute capability: 8.0


Found 10000 files belonging to 100 classes.


In [3]:
class SparseConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, p, **kwargs):
        super(SparseConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.p = p

    def build(self, input_shape):
        self.kernel = self.add_weight(name='kernel',
                                      shape=(self.kernel_size, self.kernel_size, input_shape[-1], self.filters),
                                      initializer='glorot_uniform',
                                      trainable=True)
        self.bias = self.add_weight(name='bias',
                                    shape=(self.filters,),
                                    initializer='zeros',
                                    trainable=True)

    def call(self, inputs, training=None):
        if training:
            mask = tf.random.uniform(shape=(self.filters,), minval=0, maxval=1)
            mask = tf.cast(mask < self.p, dtype=tf.float32)
            mask = tf.reshape(mask, [1, 1, 1, self.filters])
        else:
            mask = tf.ones([1, 1, 1, self.filters]) * self.p
    
        sparse_kernel = self.kernel * mask
        conv = tf.nn.conv2d(inputs, sparse_kernel, strides=[1, 1, 1, 1], padding='SAME')
        return tf.nn.bias_add(conv, self.bias)

    def update_p(self, new_p):
        self.p = new_p

In [4]:
# 创建模型使用稀疏卷积层
inputs = tf.keras.Input(shape=(224, 224, 3))  # 输入层，形状为 (224, 224, 3)
x = SparseConv2D(filters=64, kernel_size=3, p=1, name='sparse_conv2d_1.1')(inputs)  # 第一个稀疏卷积层
x = layers.Activation('relu')(x)  # 激活函数
x = SparseConv2D(filters=64, kernel_size=3, p=1, name='sparse_conv2d_1.2')(x)  # 第一个稀疏卷积层
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)  # 池化层

# 添加更多的卷积层
x = SparseConv2D(filters=128, kernel_size=3, p=1, name='sparse_conv2d_2.1')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=128, kernel_size=3, p=1, name='sparse_conv2d_2.2')(x)
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

x = SparseConv2D(filters=256, kernel_size=3, p=1, name='sparse_conv2d_3.1')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=256, kernel_size=3, p=1, name='sparse_conv2d_3.2')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=256, kernel_size=3, p=1, name='sparse_conv2d_3.3')(x)
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

x = SparseConv2D(filters=512, kernel_size=3, p=1, name='sparse_conv2d_4.1')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=512, kernel_size=3, p=1, name='sparse_conv2d_4.2')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=512, kernel_size=3, p=1, name='sparse_conv2d_4.3')(x)
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

x = SparseConv2D(filters=512, kernel_size=3, p=1, name='sparse_conv2d_5.1')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=512, kernel_size=3, p=1, name='sparse_conv2d_5.2')(x)
x = layers.Activation('relu')(x)
x = SparseConv2D(filters=512, kernel_size=3, p=1.0, name='sparse_conv2d_5.3')(x)
x = layers.Activation('relu')(x)
x = layers.Flatten()(x)  # 全局最大池化层

# 添加更多的全连接层
x = layers.Dense(4096, activation='relu')(x)
x = layers.Dense(4096, activation='relu')(x)
x = layers.Dense(1000, activation='relu')(x)
outputs = layers.Dense(100, activation='softmax')(x)  # 输出层，假设输出为100个类别

model = models.Model(inputs=inputs, outputs=outputs)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sparse_conv2d_1.1 (SparseCo  (None, 224, 224, 64)     1792      
 nv2D)                                                           
                                                                 
 activation (Activation)     (None, 224, 224, 64)      0         
                                                                 
 sparse_conv2d_1.2 (SparseCo  (None, 224, 224, 64)     36928     
 nv2D)                                                           
                                                                 
 activation_1 (Activation)   (None, 224, 224, 64)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 64)     0     

In [5]:
class UpdatePSparsity(tf.keras.callbacks.Callback):
    def __init__(self, model, sparsity_schedule):
        super(UpdatePSparsity, self).__init__()
        self.model = model
        self.sparsity_schedule = sparsity_schedule

    def on_epoch_end(self, epoch, logs=None):
        for layer_name, new_p in self.sparsity_schedule.items():
            layer = self.model.get_layer(name=layer_name)
            if epoch < len(new_p):
                p_value = new_p[epoch]
            else:
                p_value = new_p[-1]  # Use the last value for epochs beyond the predefined ones
            layer.update_p(p_value)
            #print(f"\nEpoch {epoch + 1}: Updated layer {layer_name} sparsity p to {p_value}")

sparsity_schedule = {
    'sparse_conv2d_5.1': [1.0],
    'sparse_conv2d_5.2': [1.0],
    'sparse_conv2d_5.3': [1.0]
}

In [6]:
# 使用较小的学习率和 Adam 优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# 编译模型
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [7]:
# 训练模型，并在验证集上验证
model.fit(
    train_dataset,
    epochs=40,
    validation_data=test_dataset,
    callbacks=[UpdatePSparsity(model, sparsity_schedule)]  # 确保这个回调函数是正确配置的
)

Epoch 1/40


2024-07-17 16:39:46.175740: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101
2024-07-17 16:39:48.781093: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7fcaf389ba00>