## VGG network (Visual Geometry Group)
### very deep convolutional networks for large-scale image recognition

In [1]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models, optimizers, metrics
from tensorflow.keras import regularizers

In [6]:
def normalize(X_train, X_test):
    X_train = X_train / 255.
    X_test = X_test / 255.
    mean = np.mean(X_train, axis=(0,1,2,3))
    std = np.std(X_train, axis=(0,1,2,3))
    print("mean:{:.4f} std:{:.4f}".format(mean,std))
    X_train = (X_train-mean) / (std+1e-7)
    X_test = (X_test-mean) / (std+1e-7)
    return X_train, X_test

def prepare_cifar(x,y):
    x = tf.cast(x, tf.float32)
    y = tf.cast(y, tf.float32)
    return x, y

def data_scale_down(X, Y, X_test, Y_test):
    num_train = len(X) // 10
    num_test = len(X_test) // 10
    X = X[:num_train]
    Y = Y[:num_train]
    X_test = X_test[:num_test]
    Y_test = Y_test[:num_test]
    return X, Y, X_test, Y_test

def cifar10_dataset():
    (x,y), (x_test,y_test) = datasets.cifar10.load_data()
    print("x.shape",x.shape)
    print("x_test.shape",x_test.shape)
    print("y.shape",y.shape)
    print("y_test.shape",y_test.shape)
    print("\n----Scale Down----")
    x, y, x_test, y_test = data_scale_down(x, y, x_test, y_test)
    x, x_test = normalize(x, x_test)
    print("x.shape",x.shape)
    print("x_test.shape",x_test.shape)
    print("y.shape",y.shape)
    print("y_test.shape",y_test.shape)
    print("\n----Scale Down----")
    
    # [b, 1] => [b]
    y = tf.squeeze(y, axis=1)
    # [b, 10]
    y = tf.one_hot(y, depth=10)
    y_test = tf.squeeze(y_test, axis=1)
    y_test = tf.one_hot(y_test, depth=10)
    
    train_ds = tf.data.Dataset.from_tensor_slices((x,y))
    train_ds = train_ds.map(prepare_cifar).shuffle(50000).batch(256)
    
    test_ds = tf.data.Dataset.from_tensor_slices((x_test,y_test))
    test_ds = test_ds.map(prepare_cifar).shuffle(10000).batch(256)
    return train_ds, test_ds

train_ds, test_ds = cifar10_dataset()    

mean:0.4734 std:0.2516
x.shape (50000, 32, 32, 3)
x_test.shape (10000, 32, 32, 3)
y.shape (50000, 1)
y_test.shape (10000, 1)


In [7]:
#del VGG16
class VGG16(models.Model):
    def __init__(self, input_shape):
        super(VGG16, self).__init__()
        # input_shape => [32, 32, 3]
        self.weight_decay = 0.000
        self.input_shape_ = input_shape
        self.num_classes = 10
        self.model = self.create_model()
        
    def call(self, x):        
        return self.model(x)
    def create_model(self):
        model = models.Sequential()
        model.add(layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay), input_shape=self.input_shape_))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        
        model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        
        model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        
        model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))                
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        
        model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu",
                                kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D(pool_size=(2,2)))
        
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Flatten())
        
        model.add(layers.Dense(units=512, activation="relu", kernel_regularizer=regularizers.l2(self.weight_decay)))
        model.add(layers.BatchNormalization())
        model.add(layers.Dropout(rate=0.3))
        model.add(layers.Dense(units=self.num_classes, activation="softmax"))
        
        return model

In [8]:
#del model
model = VGG16([32, 32, 3])

In [13]:
def train(epochs):
    criteon = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    _metrics = metrics.CategoricalAccuracy()
    optimizer = optimizers.Adam(learning_rate=0.01)
    
    for epoch in range(epochs):
        for step, (x,y) in enumerate(train_ds):            
            with tf.GradientTape() as tape:
                logits = model(x)
                loss = criteon(y, logits)
                # loss2 = compute_loss(logits, tf.argmax(y, axis=1))
                # mse_loss = tf.reduce_sum(tf.square(y-logits))
                # print(y.shape, logits.shape)
                _metrics.update_state(y, logits)
                
            grads = tape.gradient(loss, model.trainable_variables)
            ## MUST clip gradient here or it will disconverge!
            #grads = [tf.clip_by_norm(g, 15) for g in grads]
            grads = [tf.clip_by_norm(g, 12) for g in grads]
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            
            if step % 40 == 0:
                # for g in grads:
                #     print(tf.norm(g).numpy())
                print(epoch, step,
                      'loss:{:.4f}'.format(float(loss)),
                      'acc:{:.4f}'.format(_metrics.result().numpy()))
                _metrics.reset_states()
                
        if epoch % 1 == 0:
            metric = metrics.CategoricalAccuracy()
            for x,y in test_ds:                
                logits = model.predict(x)
                ## be careful, these functions can accept y as [b] without warnning.
                metric.update_state(y, logits)
            print("epoch",epoch,"test acc:", metric.result().numpy())
            metric.reset_states()
    print("finish training")     
    
train(epochs=3) 

0 0 loss: 2.1611337661743164 acc: 0.29296875
0 40 loss: 2.343963384628296 acc: 0.09648438
0 80 loss: 2.355682134628296 acc: 0.10205078
0 120 loss: 2.375213384628296 acc: 0.103710935
0 160 loss: 2.343963384628296 acc: 0.099609375
epoch 0 test acc: 0.1
1 0 loss: 2.316619634628296 acc: 0.098230086
1 40 loss: 2.367400884628296 acc: 0.09746094
1 80 loss: 2.367400884628296 acc: 0.09511719
1 120 loss: 2.379119634628296 acc: 0.099804685
1 160 loss: 2.383025884628296 acc: 0.1046875
epoch 1 test acc: 0.1
2 0 loss: 2.351775884628296 acc: 0.10232301
2 40 loss: 2.390838384628296 acc: 0.099121094
2 80 loss: 2.394744634628296 acc: 0.10195313
2 120 loss: 2.347869634628296 acc: 0.102832034
2 160 loss: 2.351775884628296 acc: 0.09423828
epoch 2 test acc: 0.1
finish training


In [26]:
#export_path = "./models/vgg16s"
#model.save(export_path)
#tf.saved_model.save(model, export_path)
## Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. 
## It does not work for subclassed models

print("vgg16 model saved")

INFO:tensorflow:Assets written to: ./models/vgg16s\assets
vgg16s model saved
