In [1]:
# 基于tensorflow的AlexNet的实现
# 设置GPU进行训练
# 模型编译（总共有11层，5层卷积，3层池化，3层全连接
#     1. 卷积层（filters=96， kernel_size=11，strides=4, activation='ReLu'
#     2. 最大池化层（pool_size=3, stirdes=2)
#     3. 卷积层(filters=256, kernel_size=5, padding='same', activation='ReLu')
#     4. 最大池化层(pool_size=3, strides=2)
#     5. 卷积层(filters=384, kernel_size=3, padding='same', activation='ReLu')
#     6. 卷积层(filters=384, kernel_size=3, padding='same', activation='ReLu')
#     7. 卷积层(filters=256, kernel_size=3, padding='same', activation='ReLu')
#     8. 最大池化层（pool_size=3, stirdes=2)
#     9. 摊平
#     10. 全连接层(units=4096, activation='ReLu')
#     11. dropout(0.5)
#     12. 全连接层(units=4096, activation='ReLu')
#     13. dropout(0.5)
#     14. 全连接层(units=10, activation='sigmoid')
# 数据处理
#     直接用ImageNet训练时间太久了，将fashion_mnist扩充 
#     利用tf.image.resize_with_pad(image, target_height, target_width, 其他默认参数就好)
# 优化器 tf.keras.optimizes.SGD(learning_rate=0.01, momentum=0.0, nesterov=False)
# 模型编译，loss=sparse_categorical_crossentropy
# 模型训练 net.fit(X,Y, validation_split=0.1)

import numpy as np
import tensorflow as tf

In [2]:
# 使用GPU训练
for gpu in tf.config.experimental.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
# AlexNet 模型
net = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4, activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Conv2D(filters=256, kernel_size=5, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=4096, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=10, activation='sigmoid')
])

In [4]:
# 数据处理
fashion_mnist = tf.keras.datasets.fashion_mnist
class DataLoad():
    def __init__(self):
        (self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
        self.train_images = np.expand_dims(self.train_images.astype(np.float32) / 255.0, axis=-1)
        self.test_images = np.expand_dims(self.test_images.astype(np.float32) / 255.0, axis=-1)
        self.train_labels = self.train_labels.astype(np.int32)
        self.test_labels = self.test_labels.astype(np.int32)
        self.train_num, self.test_num = self.train_images.shape[0], self.test_images.shape[0]
    
    def get_train_batch(self, batch_size):
        index = np.random.randint(0, self.train_num, batch_size)
        train_batch = tf.image.resize_with_pad(self.train_images[index], 244, 244)
        return train_batch, self.train_labels[index]
    
    def get_test_batch(self, batch_size):
        index = np.random.randint(0, self.test_num, batch_size)
        test_batch = tf.image.resize_with_pad(self.test_images[index], 244, 244)
        return test_batch, self.test_labels[index]

batch_size = 2000
data_load = DataLoad()

In [5]:
# 模型训练
def train_AlexNet():
    epochs = 5
    for _ in range(epochs):
        num_iter = data_load.train_num // batch_size
        for t in range(num_iter):
            X_train, Y_train = data_load.get_train_batch(batch_size)
            net.fit(X_train, Y_train)
            if t % 20 == 0:
                net.save_weights('alexnet_weights.h5')

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
           loss= 'sparse_categorical_crossentropy',
           metrics=['accuracy'])
train_AlexNet()
# 显存不够算不完



ResourceExhaustedError: OOM when allocating tensor with shape[2000,244,244,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResizeBilinear]

In [6]:
# 显存不够
net.load_weights('alexnet_weights.h5')
test_images, test_labels = data_load.get_test_batch(batch_size)
net.evaluate(test_images, test_labels, verbose)

ResourceExhaustedError: OOM when allocating tensor with shape[2000,244,244,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:ResizeBilinear]