# Topic
* Case1: First, I study from [VGG in TensorFlow](https://www.cs.toronto.edu/~frossard/post/vgg16/) and implement it in [vgg16_pretrained_predict.ipynb](../vgg16_pretrained_predict.ipynb).
  * Download the pretrained parameters and using the VGG16 model to predict, it works.
* Case2: Then, I study from [Ashing00/Tensorflow_VGG](https://github.com/Ashing00/Tensorflow_VGG) and implement it in [vgg_cifar10_train.ipynb](../vgg16_cifar10_train/vgg_cifar10_train.ipynb)
  * We using VGG16 modes (but the model has little different with Case1) to do training with CIFAR-10 dataset, it works.
* There are some little different in the two VGG16 model 
  * Case 1 
    * Do pre-process in data input, minus the mean value of RGB
    * Pooling kernel size = 2x2, stride = 2
  * Case 2
    * Didn't the pre-process in data input. But it do batch normalization in each hiddle layer.
    * Pooling kernel size = 2x2, stride = 2, except The Pool_1 (kernel size = 3x3, stride = 1)
* Issue:
  The training is failed, we can't get good accuracy.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import tensorflow as tf
import numpy as np
import os
from matplotlib import pyplot as plt
from vgg16_cifar10 import vgg16

data_dir = "../../../data/"
extract_folder = 'cifar-10-batches-bin'

In [2]:
def encode_labels(y, k):
    """
    Encode labels into one-hot representation
    y: the items.
    k: the class number.
    """
    onehot = np.zeros((y.shape[0], k))
    for idx, val in enumerate(y):
        onehot[idx,val] = 1.0  ##idx=0~xxxxx，if val =3 ,表示欄位3要設成1.0
    return onehot

In [3]:
# Load image data from binary files. (train/test)

def load_train_data(n):    # n=1,2..5, data_batch_1.bin ~ data_batch_5.bin
    """Load Cifar10 data from `path`"""
    images_path = os.path.join(data_dir, extract_folder, 'data_batch_{}.bin'.format(n)) 
    return _load_binary_data(images_path)

def load_test_data():      # test_batch
    """Load Cifar10 test data from `path`"""
    test_path = os.path.join(data_dir, extract_folder, 'test_batch.bin') 
    return _load_binary_data(test_path)

def _load_binary_data(path):
    with open(path, 'rb') as img_file:
        images = np.fromfile(img_file, dtype=np.uint8)
    return images    

In [4]:
# Parameters

MODEL_SAVE_PATH = "./vgg/"
MODEL_NAME = "vgg_cifar_model"
learning_rate = 0.001
BATCH_SIZE = 120
display_step = 100
TRAINING_STEPS = 6000

# Network Parameters
n_input = 3072 # cifar data input (img shape: 32x32x3)
n_classes = 10 # cifar10 total classes (0-9 )
# Ryan mark
# dropout = 0.60 # Dropout, probability to keep units

In [5]:
# Training

def train(X_train, y_train_lable):
    shuffle = True
    batch_idx = 0
    batch_len = int( X_train.shape[0] / BATCH_SIZE)
    print("batch_len=", batch_len) # 50000/120 = 416.6 => 416
    train_loss = []
    train_acc = []
    train_idx = np.random.permutation(batch_len)    # 打散資料順序

    # tf Graph input
    x_ = tf.placeholder(tf.float32, [None, n_input])
    y = tf.placeholder(tf.float32, [None, n_classes])
    # Ryan mark
    # keep_prob = tf.placeholder(tf.float32)    # dropout (keep probability)
    x = tf.reshape(x_, shape=[-1, 32, 32, 3])

    # Construct model
    mean = list(np.mean(np.reshape(X_train, (-1, 32, 32, 3)) , axis=(0, 1, 2)))
    print("mean:", mean)
    vgg = vgg16(x, mean=mean)
    pred = vgg.fc3l

    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
    #GradientDescentOptimizer
    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # 初始化TensorFlow持久化類。
    saver = tf.train.Saver()
    # Initializing the variables
    init = tf.global_variables_initializer()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)
        step = 1
        print ("Start  training!")
        # Keep training until reach max iterations:
        while step < TRAINING_STEPS:
            #batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
            if shuffle==True:
                batch_shuffle_idx=train_idx[batch_idx]
                batch_xs=X_train[batch_shuffle_idx*BATCH_SIZE:batch_shuffle_idx*BATCH_SIZE+BATCH_SIZE]
                batch_ys=y_train_lable[batch_shuffle_idx*BATCH_SIZE:batch_shuffle_idx*BATCH_SIZE+BATCH_SIZE]
            else:
                batch_xs=X_train[batch_idx*BATCH_SIZE:batch_idx*BATCH_SIZE+BATCH_SIZE]
                batch_ys=y_train_lable[batch_idx*BATCH_SIZE:batch_idx*BATCH_SIZE+BATCH_SIZE]

            if batch_idx<batch_len:
                batch_idx+=1
                if batch_idx==batch_len:
                    batch_idx=0
            else:
                batch_idx=0
            # Ryan: maybe we don't need the reshape? No, We need it, because of the input feature in conv1_1 is 3
            reshaped_xs = np.reshape(batch_xs, (BATCH_SIZE, 32, 32, 3))
            # Run optimization op (backprop)
            sess.run(optimizer, feed_dict={x: reshaped_xs, y: batch_ys})
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={x: reshaped_xs, y: batch_ys})
            train_loss.append(loss)
            train_acc.append(acc)
            if step % display_step == 0:
                print("Step: " + str(step) + ", Minibatch Loss= " + \
                    "{:.6f}".format(loss) + ", Training Accuracy= " + \
                    "{:.5f}".format(acc))
            step += 1
        print("Optimization Finished!")
        print("Save model...")
        saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))

#         plt.subplot(1,2,1)
#         plt.plot(train_loss)
#         plt.xlabel('Iter')
#         plt.ylabel('loss')
#         plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, TRAINING_STEPS, BATCH_SIZE))
#         #plt.tight_layout()

#         plt.subplot(1,2,2)
#         plt.plot(train_acc)
#         plt.xlabel('Iter')
#         plt.ylabel('accuracy')
#         plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, TRAINING_STEPS, BATCH_SIZE))
#         #plt.tight_layout()
#         plt.savefig('vgg_cifar10_acc.jpg', dpi=200)
#         plt.show()

In [6]:
def vgg_train(argv=None):
    ##Load Cifar-10 train image and label
    X_train_image1 = load_train_data(1)    # load data_batch_1.bin
    X_train_image2 = load_train_data(2)    # load data_batch_2.bin
    X_train_image3 = load_train_data(3)    # load data_batch_3.bin
    X_train_image4 = load_train_data(4)    # load data_batch_4.bin
    X_train_image5 = load_train_data(5)    # load data_batch_5.bin
    print(X_train_image1.shape)            # (30730000,)

    X_train_image=np.concatenate((X_train_image1,X_train_image2,X_train_image3,X_train_image4,X_train_image5),axis=0)
    print(X_train_image.shape)             # (153650000,)

    # reshape to (50000,3073)
    # in one Row ,the 1st byte is the label,other 3072byte =1024 Red +1024 green +1024 blue ch data
    X_train_image = X_train_image.reshape(-1, 3073)
    tempA = X_train_image.copy()
    X_train_image = np.delete(X_train_image, 0, 1)           # delete 1st column data. (obj=0, axis=1)
    X_train_image = X_train_image.reshape(-1, 3, 32, 32)     # reshape to (50000,3,32,32)
    X_train_image = X_train_image.transpose([0, 2, 3, 1])    # transfer to (50000,32,32,3)
    X_train_image = X_train_image.reshape(-1, 3072)          # (50000, 3072)

    # split to 3073 col,the first column is the label.
    tempA = np.hsplit(tempA, 3073)
    X_train_label = np.asarray(tempA[0])
    X_train_label = X_train_label.reshape([50000,])         # (50000,)

    print("X_train_image.shape =", X_train_image.shape)
    X_train_label = encode_labels(X_train_label, 10)
    print("X_train_lable.shape =", X_train_label.shape)
    # print(X_train_label[0:50])
    ##============================

    train(X_train_image, X_train_label)

vgg_train()

(30730000,)
(153650000,)
X_train_image.shape = (50000, 3072)
X_train_lable.shape = (50000, 10)
batch_len= 416
mean: [125.306918046875, 122.950394140625, 113.86538318359375]
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Start  training!
Step: 100, Minibatch Loss= 4177.041016, Training Accuracy= 0.16667
Step: 200, Minibatch Loss= 2121.913330, Training Accuracy= 0.20833
Step: 300, Minibatch Loss= 1248.554688, Training Accuracy= 0.15000
Step: 400, Minibatch Loss= 898.462891, Training Accuracy= 0.13333
Step: 500, Minibatch Loss= 620.257080, Training Accuracy= 0.22500
Step: 600, Minibatch Loss= 314.033478, Training Accuracy= 0.15833
Step: 700, Minibatch Loss= 12.636030, Training Accuracy= 0.15000
Step: 800, Minibatch Loss= 3.156986, Training Accuracy= 0.09167
Step: 900, Minibatch Loss= 3.656419, Training Accuracy= 0.18333
Step: 1000, Minibatch Loss=