In [1]:
import tensorflow as tf
import tensorflow.keras as keras
print(tf.__version__)

2.0.0-rc1


In [13]:
class VGG16(keras.models.Model):
    def __init__(self, input_shape):
        """
        :param input_shape: [32, 32, 3]
        """
        super(VGG16, self).__init__()
        weight_decay = 0.000
        self.num_classes = 10

        model = keras.Sequential()
        model.add(keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=input_shape, kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.3))

        model.add(keras.layers.Conv2D(64, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model.add(keras.layers.Conv2D(128, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(128, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model.add(keras.layers.Conv2D(256, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(256, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(256, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dropout(0.4))

        model.add(keras.layers.Conv2D(512, (3, 3), padding='same',kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
        model.add(keras.layers.Dropout(0.5))

        model.add(keras.layers.Flatten())
        model.add(keras.layers.Dense(512,kernel_regularizer=keras.regularizers.l2(weight_decay)))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.Dropout(0.5))
        model.add(keras.layers.Dense(self.num_classes))
        # model.add(layers.Activation('softmax'))

        self.model = model

    def call(self, x):
        return self.model(x)

In [9]:
(x_train,y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

In [7]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)


In [10]:
def prepare_features_and_labels(x, y):
    x = tf.cast(x, tf.float32) / 255.0
    y = tf.cast(y, tf.int64)
    return x, y
def make_dataset(data,label):
    dataset=tf.data.Dataset.from_tensor_slices((data, label))
    dataset = dataset.map(prepare_features_and_labels)
    dataset = dataset.shuffle(10000).batch(100)
    return dataset

dataset_train=make_dataset(x_train,y_train)
dataset_test=make_dataset(x_test, y_test)

In [11]:
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))

In [14]:
model = VGG16([32, 32, 3])
# must specify from_logits=True!
criteon = keras.losses.CategoricalCrossentropy(from_logits=True)
metric = keras.metrics.CategoricalAccuracy()
optimizer = keras.optimizers.Adam(learning_rate=0.0001)

In [None]:
for epoch in range(5):
    for step, (x, y) in enumerate(dataset_train):
        # [b, 1] => [b]
        y = tf.squeeze(y, axis=1)
        # [b, 10]
        y = tf.one_hot(y, depth=10)

        with tf.GradientTape() as tape:
            logits = model(x)
            loss = criteon(y, logits)
            # loss2 = compute_loss(logits, tf.argmax(y, axis=1))
            # mse_loss = tf.reduce_sum(tf.square(y-logits))
            # print(y.shape, logits.shape)
            metric.update_state(y, logits)

        grads = tape.gradient(loss, model.trainable_variables)
        # MUST clip gradient here or it will disconverge!
        grads = [ tf.clip_by_norm(g, 15) for g in grads]
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 40 == 0:
            max_grad=-1e-6
            for g in grads:
                tmp_grad=tf.norm(g).numpy()
                if tmp_grad > max_grad: max_grad=tmp_grad
                #print(tf.norm(g).numpy())
                
            print("epoch=", epoch, "step=", step, 'loss:', float(loss), 'acc:', metric.result().numpy(), "max_grad:", max_grad)
            metric.reset_states()


    if epoch % 1 == 0:

        test_acc = keras.metrics.CategoricalAccuracy()
        test_loss = 0.0
        for x, y in dataset_test:
            # [b, 1] => [b]
            y = tf.squeeze(y, axis=1)
            # [b, 10]
            y = tf.one_hot(y, depth=10)
            logits = model.predict(x)
            # be careful, these functions can accept y as [b] without warnning.
            test_acc.update_state(y, logits)
            test_loss += criteon(y, logits)
        print('test acc:', test_acc.result().numpy(), "test loss:", float(test_loss))
        metric.reset_states()

epoch= 0 step= 0 loss: 2.192420482635498 acc: 0.20333333 max_grad: 6.5817246
epoch= 0 step= 40 loss: 2.043679714202881 acc: 0.1985 max_grad: 2.9994366
epoch= 0 step= 80 loss: 2.0044972896575928 acc: 0.217 max_grad: 7.845404
epoch= 0 step= 120 loss: 1.944870114326477 acc: 0.22875 max_grad: 2.7888246
epoch= 0 step= 160 loss: 1.908591628074646 acc: 0.25375 max_grad: 15.0
epoch= 0 step= 200 loss: 1.8541368246078491 acc: 0.27925 max_grad: 5.510813
epoch= 0 step= 240 loss: 1.7668964862823486 acc: 0.32575 max_grad: 10.063973
epoch= 0 step= 280 loss: 1.7065242528915405 acc: 0.31875 max_grad: 15.0
epoch= 0 step= 320 loss: 1.5661728382110596 acc: 0.32375 max_grad: 1.6568742
epoch= 0 step= 440 loss: 1.452691912651062 acc: 0.41525 max_grad: 10.038286
epoch= 0 step= 480 loss: 1.642988681793213 acc: 0.4055 max_grad: 10.916514
test acc: 0.4179 test loss: 153.21775817871094
epoch= 1 step= 0 loss: 1.478299617767334 acc: 0.42 max_grad: 12.54433
epoch= 1 step= 40 loss: 1.4729194641113281 acc: 0.42875 max