# Topic
### Step1 
* Case1: First, I study from [VGG in TensorFlow](https://www.cs.toronto.edu/~frossard/post/vgg16/) and implement it in [vgg16_pretrained_predict.ipynb](../vgg16_pretrained_predict.ipynb).
  * Download the pretrained parameters and using the VGG16 model to predict, it works.
* Case2: Then, I study from [Ashing00/Tensorflow_VGG](https://github.com/Ashing00/Tensorflow_VGG) and implement it in [vgg_cifar10_train.ipynb](../vgg16_cifar10_train/vgg_cifar10_train.ipynb)
  * We using VGG16 modes (but the model has little different with Case1) to do training with CIFAR-10 dataset, it works.
* There are some little different in the two VGG16 model 
  * Case 1 
    * Do pre-process in data input, minus the mean value of RGB
    * Pooling kernel size = 2x2, stride = 2
  * Case 2
    * Didn't the pre-process in data input. But it do batch normalization in each hiddle layer.
    * Pooling kernel size = 2x2, stride = 2, except The Pool_1 (kernel size = 3x3, stride = 1)
* Issue: (Fixed by Step2)
  The training is failed, we can't get good accuracy.  
  
### Step2
* I add batch normalization in each hidden layer. Now we can get good accuracy.
* Buy Why?
  * Before, I think the original VGG16 mode from "VGG in TensorFlow" should be good to train with CIFAR-10 dataset, but it doesn't. Did I miss something?

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import tensorflow as tf
import numpy as np
import os
from matplotlib import pyplot as plt
from vgg16_cifar10 import vgg16

data_dir = "../../../data/"
extract_folder = 'cifar-10-batches-bin'

In [2]:
def encode_labels(y, k):
    """
    Encode labels into one-hot representation
    y: the items.
    k: the class number.
    """
    onehot = np.zeros((y.shape[0], k))
    for idx, val in enumerate(y):
        onehot[idx,val] = 1.0  ##idx=0~xxxxx，if val =3 ,表示欄位3要設成1.0
    return onehot

In [3]:
# Load image data from binary files. (train/test)

def load_train_data(n):    # n=1,2..5, data_batch_1.bin ~ data_batch_5.bin
    """Load Cifar10 data from `path`"""
    images_path = os.path.join(data_dir, extract_folder, 'data_batch_{}.bin'.format(n)) 
    return _load_binary_data(images_path)

def load_test_data():      # test_batch
    """Load Cifar10 test data from `path`"""
    test_path = os.path.join(data_dir, extract_folder, 'test_batch.bin') 
    return _load_binary_data(test_path)

def _load_binary_data(path):
    with open(path, 'rb') as img_file:
        images = np.fromfile(img_file, dtype=np.uint8)
    return images    

In [4]:
# Parameters

MODEL_SAVE_PATH = "./vgg/"
MODEL_NAME = "vgg_cifar_model"
learning_rate = 0.001
BATCH_SIZE = 120
display_step = 100
TRAINING_STEPS = 6000

# Network Parameters
n_input = 3072 # cifar data input (img shape: 32x32x3)
n_classes = 10 # cifar10 total classes (0-9 )
# Ryan mark
# dropout = 0.60 # Dropout, probability to keep units

In [5]:
def get_mean():
    X_train_image1 = load_train_data(1)    # load data_batch_1.bin
    X_train_image2 = load_train_data(2)    # load data_batch_2.bin
    X_train_image3 = load_train_data(3)    # load data_batch_3.bin
    X_train_image4 = load_train_data(4)    # load data_batch_4.bin
    X_train_image5 = load_train_data(5)    # load data_batch_5.bin
    print(X_train_image1.shape)            # (30730000,)

    X_train_image=np.concatenate((X_train_image1,X_train_image2,X_train_image3,X_train_image4,X_train_image5),axis=0)
    print(X_train_image.shape)             # (153650000,)

    # reshape to (50000,3073)
    # in one Row ,the 1st byte is the label,other 3072byte =1024 Red +1024 green +1024 blue ch data
    X_train_image = X_train_image.reshape(-1, 3073)
    tempA = X_train_image.copy()
    X_train_image = np.delete(X_train_image, 0, 1)           # delete 1st column data. (obj=0, axis=1)
    X_train_image = X_train_image.reshape(-1, 3, 32, 32)     # reshape to (50000,3,32,32)
    X_train_image = X_train_image.transpose([0, 2, 3, 1])    # transfer to (50000,32,32,3)
    X_train_image = X_train_image.reshape(-1, 3072)          # (50000, 3072)
    return list(np.mean(np.reshape(X_train_image, (-1, 32, 32, 3)) , axis=(0, 1, 2)))
    
MEAN = get_mean()
print("Mean:", MEAN)

(30730000,)
(153650000,)
Mean: [125.306918046875, 122.950394140625, 113.86538318359375]


In [6]:
# Training

def train(X_train, y_train_lable, mean):
    shuffle = True
    batch_idx = 0
    batch_len = int( X_train.shape[0] / BATCH_SIZE)
    print("batch_len=", batch_len) # 50000/120 = 416.6 => 416
    train_loss = []
    train_acc = []
    train_idx = np.random.permutation(batch_len)    # 打散資料順序

    # tf Graph input
    x_ = tf.placeholder(tf.float32, [None, n_input])
    y = tf.placeholder(tf.float32, [None, n_classes])
    # Ryan mark
    # keep_prob = tf.placeholder(tf.float32)    # dropout (keep probability)
    x = tf.reshape(x_, shape=[-1, 32, 32, 3])

    # Construct model
    vgg = vgg16(x, mean=mean)
    pred = vgg.fc3l

    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
    #GradientDescentOptimizer
    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # 初始化TensorFlow持久化類。
    saver = tf.train.Saver()
    # Initializing the variables
    init = tf.global_variables_initializer()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)
        step = 1
        print ("Start  training!")
        # Keep training until reach max iterations:
        while step < TRAINING_STEPS:
            #batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
            if shuffle==True:
                batch_shuffle_idx=train_idx[batch_idx]
                batch_xs=X_train[batch_shuffle_idx*BATCH_SIZE:batch_shuffle_idx*BATCH_SIZE+BATCH_SIZE]
                batch_ys=y_train_lable[batch_shuffle_idx*BATCH_SIZE:batch_shuffle_idx*BATCH_SIZE+BATCH_SIZE]
            else:
                batch_xs=X_train[batch_idx*BATCH_SIZE:batch_idx*BATCH_SIZE+BATCH_SIZE]
                batch_ys=y_train_lable[batch_idx*BATCH_SIZE:batch_idx*BATCH_SIZE+BATCH_SIZE]

            if batch_idx<batch_len:
                batch_idx+=1
                if batch_idx==batch_len:
                    batch_idx=0
            else:
                batch_idx=0
            # Ryan: maybe we don't need the reshape? No, We need it, because of the input feature in conv1_1 is 3
            reshaped_xs = np.reshape(batch_xs, (BATCH_SIZE, 32, 32, 3))
            # Run optimization op (backprop)
            sess.run(optimizer, feed_dict={x: reshaped_xs, y: batch_ys})
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={x: reshaped_xs, y: batch_ys})
            train_loss.append(loss)
            train_acc.append(acc)
            if step % display_step == 0:
                print("Step: " + str(step) + ", Minibatch Loss= " + \
                    "{:.6f}".format(loss) + ", Training Accuracy= " + \
                    "{:.5f}".format(acc))
            step += 1
        print("Optimization Finished!")
        print("Save model...")
        saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))

In [7]:
def vgg_train(mean, argv=None):
    ##Load Cifar-10 train image and label
    X_train_image1 = load_train_data(1)    # load data_batch_1.bin
    X_train_image2 = load_train_data(2)    # load data_batch_2.bin
    X_train_image3 = load_train_data(3)    # load data_batch_3.bin
    X_train_image4 = load_train_data(4)    # load data_batch_4.bin
    X_train_image5 = load_train_data(5)    # load data_batch_5.bin
    print(X_train_image1.shape)            # (30730000,)

    X_train_image=np.concatenate((X_train_image1,X_train_image2,X_train_image3,X_train_image4,X_train_image5),axis=0)
    print(X_train_image.shape)             # (153650000,)

    # reshape to (50000,3073)
    # in one Row ,the 1st byte is the label,other 3072byte =1024 Red +1024 green +1024 blue ch data
    X_train_image = X_train_image.reshape(-1, 3073)
    tempA = X_train_image.copy()
    X_train_image = np.delete(X_train_image, 0, 1)           # delete 1st column data. (obj=0, axis=1)
    X_train_image = X_train_image.reshape(-1, 3, 32, 32)     # reshape to (50000,3,32,32)
    X_train_image = X_train_image.transpose([0, 2, 3, 1])    # transfer to (50000,32,32,3)
    X_train_image = X_train_image.reshape(-1, 3072)          # (50000, 3072)

    # split to 3073 col,the first column is the label.
    tempA = np.hsplit(tempA, 3073)
    X_train_label = np.asarray(tempA[0])
    X_train_label = X_train_label.reshape([50000,])         # (50000,)

    print("X_train_image.shape =", X_train_image.shape)
    X_train_label = encode_labels(X_train_label, 10)
    print("X_train_lable.shape =", X_train_label.shape)
    # print(X_train_label[0:50])
    ##============================

    train(X_train_image, X_train_label, mean)

vgg_train(MEAN)

(30730000,)
(153650000,)
X_train_image.shape = (50000, 3072)
X_train_lable.shape = (50000, 10)
batch_len= 416
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Start  training!
Step: 100, Minibatch Loss= 1.616541, Training Accuracy= 0.47500
Step: 200, Minibatch Loss= 1.260319, Training Accuracy= 0.55000
Step: 300, Minibatch Loss= 1.051058, Training Accuracy= 0.64167
Step: 400, Minibatch Loss= 1.136269, Training Accuracy= 0.60833
Step: 500, Minibatch Loss= 0.936667, Training Accuracy= 0.61667
Step: 600, Minibatch Loss= 0.791617, Training Accuracy= 0.71667
Step: 700, Minibatch Loss= 0.825008, Training Accuracy= 0.72500
Step: 800, Minibatch Loss= 0.796007, Training Accuracy= 0.71667
Step: 900, Minibatch Loss= 0.782628, Training Accuracy= 0.75000
Step: 1000, Minibatch Loss= 0.641943, Training Accuracy= 0.80000
Step: 1100, Minibatch Loss= 0.991825, Tra

# Validate with full test dataset
上例的 accuracy 是由 train dataset 計算出來的，不客觀。
以下則是以 test dataset 為 input 計算其 accuracy。

In [8]:
def evaluate(X_test,y_test_lable, mean):
    with tf.Graph().as_default() as g:

        # 定義輸出為4維矩陣的 placeholder
        x_ = tf.placeholder(tf.float32, [None, n_input])
        x = tf.reshape(x_, shape=[-1, 32, 32, 3])
        y = tf.placeholder(tf.float32, [None, n_classes])

        # Construct model
        vgg = vgg16(x, mean)
        pred = vgg.fc3l

        # Evaluate model
        correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        test_batch_len =int( X_test.shape[0]/BATCH_SIZE)
        test_acc=[]

        test_xs = np.reshape(X_test, (X_test.shape[0], 32, 32, 3))
        batchsize = BATCH_SIZE

        # 'Saver' op to save and restore all the variables
        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess,"./vgg/vgg_cifar_model")

            for i in range(test_batch_len):
                temp_acc= sess.run(accuracy, feed_dict={x: test_xs[batchsize*i:batchsize*i+batchsize], y: y_test_lable[batchsize*i:batchsize*i+batchsize]})
                test_acc.append(temp_acc)
                print ("Test  batch ",i,":Testing Accuracy:",temp_acc)

            t_acc=tf.reduce_mean(tf.cast(test_acc, tf.float32))
            print("Average Testing Accuracy=",sess.run(t_acc))
            return

        
def vgg_eval(mean, argv=None):
    ##Load Cifar-10 test image  and label	
    X_test_image = load_test_data()	#load test_batch.bin
    #reshape to (10000,3073)
    #in one Row ,the 1st byte is the label,other 3072byte =1024 Red +1024 green +1024 blue ch data
    X_test_image=X_test_image.reshape(-1,3073)
    tempA=X_test_image.copy()
    X_test_image=np.delete(X_test_image, 0, 1) #delete 1st column data
    X_test_image=X_test_image.reshape(-1,3,32,32)  #(1000,3,32,32)
    X_test_image = X_test_image.transpose([0, 2, 3, 1])	#transfer to (10000,32,32,3)
    X_test_image=X_test_image.reshape(-1,3072)  #(50000,3,32,32)

    #split to 3073 col,the first column is the label.
    tempA=np.hsplit(tempA,3073)	
    X_test_label=np.asarray(tempA[0])
    X_test_label=X_test_label.reshape([10000,])


    #mms=MinMaxScaler()
    #X_test_image=mms.fit_transform(X_test_image)

    X_test_label = encode_labels(X_test_label,10)
    print("X_test_image.shape=",X_test_image.shape)
    print("X_test_label.shape=",X_test_label.shape)
    print(X_test_label[0:5])
    evaluate(X_test_image,X_test_label, mean)

vgg_eval(mean=MEAN)

X_test_image.shape= (10000, 3072)
X_test_label.shape= (10000, 10)
[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]
INFO:tensorflow:Restoring parameters from ./vgg/vgg_cifar_model
Test  batch  0 :Testing Accuracy: 0.78333336
Test  batch  1 :Testing Accuracy: 0.825
Test  batch  2 :Testing Accuracy: 0.78333336
Test  batch  3 :Testing Accuracy: 0.7416667
Test  batch  4 :Testing Accuracy: 0.8666667
Test  batch  5 :Testing Accuracy: 0.85833335
Test  batch  6 :Testing Accuracy: 0.775
Test  batch  7 :Testing Accuracy: 0.825
Test  batch  8 :Testing Accuracy: 0.7583333
Test  batch  9 :Testing Accuracy: 0.81666666
Test  batch  10 :Testing Accuracy: 0.7583333
Test  batch  11 :Testing Accuracy: 0.8
Test  batch  12 :Testing Accuracy: 0.71666664
Test  batch  13 :Testing Accuracy: 0.78333336
Test  batch  14 :Testing Accuracy: 0.71666664
Test  batch  15 :Testing Accuracy: 0.7583333
Test 