In [1]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers, models

2024-07-17 19:27:03.621396: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'resized_train_cifar100',
    label_mode='int',  # 根据您的标签类型调整，可能需要额外处理标签文件
    batch_size=128,
    image_size=(224, 224))

test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    'resized_test_cifar100',
    label_mode='int',
        batch_size=128,
    image_size=(224, 224))

Found 50000 files belonging to 100 classes.


2024-07-17 19:27:06.473703: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-17 19:27:07.045112: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78924 MB memory:  -> device: 0, name: NVIDIA A800 80GB PCIe, pci bus id: 0000:56:00.0, compute capability: 8.0


Found 10000 files belonging to 100 classes.


In [3]:
class SparseConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, p, **kwargs):
        super(SparseConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.p = p

    def build(self, input_shape):
        self.kernel = self.add_weight(name='kernel',
                                      shape=(self.kernel_size, self.kernel_size, input_shape[-1], self.filters),
                                      initializer='glorot_uniform',
                                      trainable=True)
        self.bias = self.add_weight(name='bias',
                                    shape=(self.filters,),
                                    initializer='zeros',
                                    trainable=True)

    def call(self, inputs, training=None):
        if training:
            mask = tf.random.uniform(shape=(self.filters,), minval=0, maxval=1)
            mask = tf.cast(mask < self.p, dtype=tf.float32)
            mask = tf.reshape(mask, [1, 1, 1, self.filters])
        else:
            mask = tf.ones([1, 1, 1, self.filters]) * self.p
    
        sparse_kernel = self.kernel * mask
        conv = tf.nn.conv2d(inputs, sparse_kernel, strides=[1, 1, 1, 1], padding='SAME')
        return tf.nn.bias_add(conv, self.bias)

    def update_p(self, new_p):
        self.p = new_p

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential

# 使用较小的学习率和 Adam 优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

def alexnet_model(input_shape=(224, 224, 3), num_classes=100):
    model = Sequential([
        Conv2D(96, kernel_size=11, strides=4, padding='valid', activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(pool_size=3, strides=2),
        Conv2D(256, kernel_size=5, padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=3, strides=2),
        Conv2D(384, kernel_size=3, padding='same', activation='relu'),
        Conv2D(384, kernel_size=3, padding='same', activation='relu'),
        SparseConv2D(filters=256, kernel_size=3, p=1, name='sparse_conv2d'),
        layers.Activation('relu'),
        MaxPooling2D(pool_size=3, strides=2),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

# 创建模型
model = alexnet_model()

# 编译模型
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 模型摘要
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 54, 54, 96)        34944     
                                                                 
 batch_normalization (BatchN  (None, 54, 54, 96)       384       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 26, 26, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 26, 26, 256)       614656    
                                                                 
 batch_normalization_1 (Batc  (None, 26, 26, 256)      1024      
 hNormalization)                                                 
                                                        

In [5]:
class UpdatePSparsity(tf.keras.callbacks.Callback):
    def __init__(self, model, sparsity_schedule):
        super(UpdatePSparsity, self).__init__()
        self.model = model
        self.sparsity_schedule = sparsity_schedule

    def on_epoch_end(self, epoch, logs=None):
        for layer_name, new_p in self.sparsity_schedule.items():
            layer = self.model.get_layer(name=layer_name)
            if epoch < len(new_p):
                p_value = new_p[epoch]
            else:
                p_value = new_p[-1]  # Use the last value for epochs beyond the predefined ones
            layer.update_p(p_value)
            #print(f"\nEpoch {epoch + 1}: Updated layer {layer_name} sparsity p to {p_value}")

sparsity_schedule = {
    'sparse_conv2d': [1]
}

In [6]:
# 训练模型，并在验证集上验证
model.fit(
    train_dataset,
    epochs=40,
    validation_data=test_dataset,
    callbacks=[UpdatePSparsity(model, sparsity_schedule)]
)

Epoch 1/40


2024-07-17 19:27:09.953280: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8101
2024-07-17 19:27:11.216181: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40