In [27]:
import numpy as np
import tensorflow as tf
import PIL.Image as PI
from pylab import *
DEFAULT_PADDING = "SAME"

In [38]:
train_path = "/home/ghr/campaign/baidu big data/训练数据/data_label.txt"
val_path = "/home/ghr/campaign/baidu big data/训练数据/val_label.txt"
model_path = "/home/ghr/NoteBook/tensorflow-vgg-master/vgg16.npy"

&emsp;用于读取预训练的权重

In [13]:
def load(data_path, session):
    data_dict = np.load(data_path, encoding='latin1').item()
    for key in data_dict:
        with tf.variable_scope(key, reuse=True):
            for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                session.run(tf.get_variable(subkey).assign(data))

def load_with_skip(data_path, session, skip_layer):
    data_dict = np.load(data_path, encoding='latin1').item()
    for key in data_dict:
        if key not in skip_layer:
            with tf.variable_scope(key, reuse=True):
                for subkey, data in zip(('weights', 'biases'), data_dict[key]):
                    session.run(tf.get_variable(subkey).assign(data))

&emsp;make_var用于创建变量,concat用于连接tensor,这里连接不同group

In [14]:
def make_var(name, shape):
    return tf.get_variable(name, shape)
    
def concat(inputs, axis, name):
    return tf.concat(values = inputs, axis = axis, name = name)

&emsp;k_h, k_w为kernel的尺寸;s_h, s_w为strides的尺寸;c_i为输入通道数,c_o为>卷积核数、输出通道数;name为层名;group > 1是将输入通道分为多组来卷积

In [28]:
def conv(input, k_h, k_w, c_o, s_h, s_w, name, relu = True, padding = DEFAULT_PADDING, group = 1):
    c_i = input.get_shape()[-1].value
    assert c_i % group == 0
    assert c_o % group == 0        
    convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding = padding)
    with tf.variable_scope(name) as scope:
        kernel = make_var('weights', shape = [k_h, k_w, c_i / group, c_o])
        biases = make_var('biases', [c_o])
        if group == 1:
            conv = convolve(input, kernel)
        else:
            input_groups = tf.split(input, group, 3) #分割input的第4维（通>道数）为group部分
            kernel_groups = tf.split(kernel, group, 3)
            output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)]
            conv = tf.concat(output_groups, 3)
        conv_shape_list = conv.get_shape().as_list()
        conv_shape_list[0] = -1
        if relu:
            bias = tf.reshape(tf.nn.bias_add(conv, biases), conv_shape_list)
            return tf.nn.relu(bias, name = scope.name)
    return tf.reshape(tf.nn.bias_add(conv, biases), conv_shape_list, name = scope.name)

&emsp;定义池化方案

In [29]:
def max_pool(input, k_h, k_w, s_h, s_w, name, padding = DEFAULT_PADDING):
    return tf.nn.max_pool(input,
                          ksize = [1, k_h, k_w, 1],
                          strides = [1, s_h, s_w, 1],
                          padding = padding,
                          name = name)

def avg_pool(input, k_h, k_w, s_h, s_w, name, padding = DEFAULT_PADDING):
    return tf.nn.avg_pool(input,
                          ksize = [1, k_h, k_w, 1],
                          strides = [1, s_h, s_w, 1],
                          padding = padding,
                          name = name)

&emsp;定义fully connect层,softmax层和dropout层

In [36]:
def fc(input, num_in, num_out, name, relu=True):
    with tf.variable_scope(name) as scope:
        weights = make_var('weights', shape = [num_in, num_out])
        biases = make_var('biases', [num_out])
        op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
        fc = op(input, weights, biases, name = scope.name)
        return fc

def softmax(input, name):
    return tf.nn.softmax(input, name = name)

def dropout(input, keep_prob):
    return tf.nn.dropout(input, keep_prob)

&emsp;定义vggnet16网络结构

In [23]:
xs = tf.placeholder(dtype = tf.float32, shape = [None, 224, 224, 3])
ys = tf.placeholder(dtype = tf.float32, shape = [None, 133])

In [30]:
conv1_1 = conv(xs, 3, 3, 64, 1, 1, name = "conv1_1", relu = True, padding = "SAME", group = 1)
conv1_2 = conv(conv1_1, 3, 3, 64, 1, 1, name = "conv1_2", relu = True, padding = "SAME", group = 1)
max_pool1_1 = max_pool(conv1_2, 2, 2, 2, 2, name = "max_pool1_1" , padding = "SAME")

conv2_1 = conv(max_pool1_1, 3, 3, 128, 1, 1, name = "conv2_1", relu = True, padding = "SAME", group = 1)
conv2_2 = conv(conv2_1, 3, 3, 128, 1, 1, name = "conv2_2", relu = True, padding = "SAME", group = 1)
max_pool2_1 = max_pool(conv2_1, 2, 2, 2, 2, name = "max_pool2_1" , padding = "SAME")

conv3_1 = conv(max_pool2_1, 3, 3, 256, 1, 1, name = "conv3_1", relu = True, padding = "SAME", group = 1)
conv3_2 = conv(conv3_1, 3, 3, 256, 1, 1, name = "conv3_2", relu = True, padding = "SAME", group = 1)
conv3_3 = conv(conv3_2, 3, 3, 256, 1, 1, name = "conv3_3", relu = True, padding = "SAME", group = 1)
max_pool3_1 = max_pool(conv3_3, 2, 2, 2, 2, name = "max_pool3_1", padding = "SAME")

conv4_1 = conv(max_pool3_1, 3, 3, 512, 1, 1, name = "conv4_1", relu = True, padding = "SAME", group = 1)
conv4_2 = conv(conv4_1, 3, 3, 512, 1, 1, name = "conv4_2", relu = True, padding = "SAME", group = 1)
conv4_3 = conv(conv4_2, 3, 3, 512, 1, 1, name = "conv4_3", relu = True, padding = "SAME", group = 1)
max_pool4_1 = max_pool(conv4_3, 2, 2, 2, 2, name = "max_pool4_1", padding = "SAME")

conv5_1 = conv(max_pool4_1, 3, 3, 512, 1, 1, name = "conv5_1", relu = True, padding = "SAME", group = 1)
conv5_2 = conv(conv5_1, 3, 3, 512, 1, 1, name = "conv5_2", relu = True, padding = "SAME", group = 1)
conv5_3 = conv(conv5_2, 3, 3, 512, 1, 1, name = "conv5_3", relu = True, padding = "SAME", group = 1)
max_pool5_1 = max_pool(conv5_3, 2, 2, 2, 2, name = "max_pool5_1", padding = "SAME")

wh = max_pool5_1.get_shape().as_list()
fc_in_num = wh[1] * wh[2] * wh[3]
max_pool5_1 = tf.reshape(max_pool5_1, [-1, fc_in_num])
fc6 = fc(max_pool5_1, fc_in_num, 4096, name = "fc6", relu = True)
dropout6 = dropout(fc6, 0.5)

fc7 = fc(dropout6, 4096, 4096, name = "fc7", relu = True)
dropout7 = dropout(fc7, 0.5)

fc8 = fc(dropout7, 4096, 133, name = "fc8", relu = False)

In [37]:
pred = softmax(fc8, name = "softmax")
cross_entropy = -tf.reduce_mean(ys * tf.log(pred))
train = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [34]:
train_file = open(train_path)
lines = train_file.readlines()
train_index = []
train_label_index = []
train_file.close()
for l in lines:
    items = l.split()
    train_index.append(items[0] + " " + items[1] + " " + items[2])
    train_label_index.append(items[3])

In [None]:
batch_size = 50
train_ptr = 0
train_label_ptr = 0
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for iter in range(2000):
        batch_train_index = train_index[train_ptr:train_ptr + batch_size]
        batch_train_label_index = train_label_index[train_ptr:train_ptr + batch_size]
        train_ptr = train_ptr + batch_size
        images = np.ndarray([batch_size, 224, 224, 3]) 
        one_hot_labels = np.zeros((batch_size, 133))
        for step in range(batch_size):
            img = array(PI.open(batch_train_index[step]).resize([224, 224]))
            images[step] = img
            one_hot_labels[step][int(batch_train_label_index[step])] = 1
        sess.run(train, feed_dict = {xs:images, ys:one_hot_labels})        
        print(sess.run(accuracy, feed_dict = {xs:images, ys:one_hot_labels}))

In [4]:
vgg16 = np.load("/home/ghr/NoteBook/tensorflow-vgg-master/vgg16.npy", encoding='latin1').item()