# cnn funetuning with pre-training vgg net

In [1]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import scipy.misc 
import scipy.io
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline  
print ("Packages loaded.")

Packages loaded.


## load data

In [2]:
cwd = os.getcwd()
last_dir = os.path.abspath(os.path.join(cwd, os.path.pardir))
load_path = last_dir + "/data/data4vgg.npz"
data = np.load(load_path)

vgg_path = last_dir + "/model/imagenet-vgg-verydeep-19.mat"

# show file
print(data.files)

['trainlabel', 'trainimg', 'testimg', 'testlabel']


## parse data

In [3]:
train_img = data['trainimg']
train_label = data['trainlabel']
test_img = data['testimg']
test_label = data['testlabel']

n_train     = train_img.shape[0]
n_class     = train_label.shape[1]
dim        = train_img.shape[1]
n_test      = test_img.shape[0]

print("shape of 'train image' is %s" %(train_img.shape, ))  #shape[0]数量， shape[1]维度
print("shape of 'test image' is %s" %(test_img.shape, ))
print("num of class is %d" %(train_label.shape[1]))

shape of 'train image' is (69, 37632)
shape of 'test image' is (18, 37632)
num of class is 2


## generate tensors for training and testing

In [7]:
train_img_tensor = np.ndarray((n_train, 112, 112, 3))
for i in range(n_train):
    curr_img = train_img[i, :]
    curr_img = np.reshape(curr_img, [112, 112, 3])
    train_img_tensor[i, :, :, :] = curr_img
print("shape of trainimg_tensor is %s" %(train_img_tensor.shape, ))

test_img_tensor = np.ndarray((n_test, 112, 112, 3))
for i in range(n_test):
    curr_img = test_img[i, :]
    curr_img = np.reshape(curr_img, [112, 112, 3])
    test_img_tensor[i, :, :, :] = curr_img
print("shape of testimg_tensor is %s" %(test_img_tensor.shape, ))

shape of trainimg_tensor is (69, 112, 112, 3)
shape of testimg_tensor is (18, 112, 112, 3)


## define a function for vgg network

In [8]:
def net(data_path, input_image):
    # layers
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )
    
    # data
    data = scipy.io.loadmat(data_path)
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]
    
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current

    assert len(net) == len(layers)
    return net, mean_pixel

def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')
def preprocess(image, mean_pixel):
    return image - mean_pixel
def unprocess(image, mean_pixel):
    return image + mean_pixel

## extract features from the vgg network

In [10]:
with tf.Graph().as_default(), tf.Session() as sess:
    with tf.device("cpu:0"):
        img_placeholder = tf.placeholder("float", shape=(None, 112, 112, 3))
        net_val, mean_pixel = net(vgg_path, img_placeholder)
        train_features = net_val['relu5_4'].eval(feed_dict={img_placeholder: train_img_tensor})
        test_features = net_val['relu5_4'].eval(feed_dict={img_placeholder: test_img_tensor})
print("type of 'train_features' is %s " %(type(train_features)))
print("shape of of  'train_featrues' is %s " %(train_features.shape, ))
print("type of 'test_features' is %s " %(type(test_features)))
print("shape of of  'test_featrues' is %s " %(test_features.shape, ))
print("preprocess done")

type of 'train_features' is <class 'numpy.ndarray'> 
shape of of  'train_featrues' is (69, 7, 7, 512) 
type of 'test_features' is <class 'numpy.ndarray'> 
shape of of  'test_featrues' is (18, 7, 7, 512) 
preprocess done


## vectorize cnn features

In [12]:
train_vectorized = np.ndarray((n_train, 7 * 7 * 512))
test_vectorized = np.ndarray((n_test, 7 * 7 * 512))
for i in range(n_train):
    curr_feat = train_features[i, :, :, :]
    curr_feat_vec = np.reshape(curr_feat, (1, -1))
    train_vectorized[i, :] = curr_feat_vec
    
for i in range(n_test):
    curr_feat = test_features[i, :, :, :]
    curr_feat_vec = np.reshape(curr_feat, (1, -1))
    test_vectorized[i, :] = curr_feat_vec
    
print("shape of train_vectorized is %s" %(train_vectorized.shape, ))
print("shape of test_vectorized is %s" %(test_vectorized.shape, ))

shape of train_vectorized is (69, 25088)
shape of test_vectorized is (18, 25088)


## define networks and functions(add 2layer MLP)

In [14]:
# parameters
learning_rate = 0.0001
training_epochs = 100
batch_size = 100
display_step = 10

# network
with tf.device("/cpu:0"):
    n_input = dim
    n_output = n_class
    weights = {
        'wd1': tf.Variable(tf.random_normal([7*7*512, 1024], stddev=0.1)),
        'wd2': tf.Variable(tf.random_normal([1024, n_output], stddev=0.1))
    }
    biases = {
        'bd1': tf.Variable(tf.random_normal([1024], stddev=0.1)),
        'bd2': tf.Variable(tf.random_normal([n_output], stddev=0.1))
    }
    def conv_basic(_input, _w, _b, _keep_prob):
        # input
        _input_r = _input 
        # vectorize
        _dense1 = tf.reshape(_input_r, [-1, _w['wd1'].get_shape().as_list()[0]])
        # fc1
        _fc1 = tf.nn.relu(tf.add(tf.matmul(_dense1, _w['wd1']), _b['bd1']))
        _dropout = tf.nn.dropout(_fc1, _keep_prob)
        # fc2
        _out = tf.add(tf.matmul(_dropout, _w['wd2']), _b['bd2'])
        # return 
        out = {
            'input_r': _input_r, 'dense1': _dense1, 'fc1': _fc1,
            'dropout': _dropout, 'out': _out
        }
        return out

# tf graph input
x = tf.placeholder("float", [None, 7*7*512])
y = tf.placeholder("float", [None, n_output])
keep_prob = tf.placeholder("float")

# function
with tf.device("/cpu:0"):
    _pred = conv_basic(x, weights, biases, keep_prob)['out']
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=_pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    _corr = tf.equal(tf.argmax(_pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(_corr, "float"))
    init = tf.global_variables_initializer()
    
print("network ready to go")        

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

network ready to go


## cnn finetuning

In [15]:
sess = tf.Session()
sess.run(init)

# training cycle
for epoch in range(training_epochs):
    avg_loss = 0
    n_batch = int(n_train / batch_size) + 1
    # loop over all batches
    for i in range(n_batch):
        rand_idx = np.random.randint(n_train, size=batch_size)
        batch_xs = train_vectorized[rand_idx, :]
        batch_ys = train_label[rand_idx, :]
        train_feed = {x: batch_xs, y: batch_ys, keep_prob: 0.7}
        sess.run(optimizer, feed_dict=train_feed)
        feed = {x: batch_xs, y: batch_ys, keep_prob: 1.}
        avg_loss += sess.run(loss, feed_dict=feed)
    avg_loss = avg_loss/n_batch
    
    # display
    if (epoch + 1) % display_step == 0:
        train_accur = sess.run(accuracy, feed_dict=feed)
        test_feed = {x: test_vectorized, y: test_label, keep_prob: 1.}
        test_accur = sess.run(accuracy, feed_dict=test_feed)
        print("Epoch: [%03d/%03d]  " "avg loss: %.9f  " "train accuracy: %.3f" "test accuracy: %.3f" 
              %(epoch, training_epochs, avg_loss, train_accur, test_accur))
print("optimize done")

Epoch: [009/100]  avg loss: 2.903176785  train accuracy: 0.770test accuracy: 0.444
Epoch: [019/100]  avg loss: 0.018299764  train accuracy: 1.000test accuracy: 0.889
Epoch: [029/100]  avg loss: 0.025710233  train accuracy: 0.990test accuracy: 0.722
Epoch: [039/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.889
Epoch: [049/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.833
Epoch: [059/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.833
Epoch: [069/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.833
Epoch: [079/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.889
Epoch: [089/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.889
Epoch: [099/100]  avg loss: 0.000000000  train accuracy: 1.000test accuracy: 0.833
optimize done
