In [1]:
import os
import math
import numpy as np
import tensorflow as tf
from PIL import Image #图像处理库
import time

In [2]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
#VGG中自带常量，VGG有将图片进行预处理，其中一个步骤是normalization:
#减去image_net的RGB通道的各个均值
VGG_MEAN = [103.939, 116.779, 123.68] #在vggnet的code中

In [4]:
class VGGNet:
    """Builds VGG-16 net structure,
        load parameters from pre-trained models.
    """
    def __init__(self, data_dict):
        self.data_dict = data_dict
        
    def get_conv_filter(self, name): #抽取卷积参数
        """eg. conv1_1 = data_dict['conv1_1']"""
        #tf.constant() #因为模型是预处理好的，所以我们不会改变参数，所以定义为常量。
        #另一个方法：可以设置成trainable = False
        return tf.constant(self.data_dict[name][0], name = 'conv') #这里应该是w,b中的w
    
    def get_fc_weight(self, name):
        return tf.constant(self.data_dict[name][0], name = 'fc') #这里应该是w,b中的w
    
    def get_bias(self, name):
        return tf.constant(self.data_dict[name][1], name = 'bias') #这里应该是w,b中的b
    
    #创建卷积层，池化层，全连接层
    def conv_layer(self, x, name):
        """Builds convolution layer."""
        with tf.name_scope(name):
            #加上name_scope是命名规范：
            #1. 防止命名冲突
            #2. tensorboard打印名字更加清晰规范
            conv_w = self.get_conv_filter(name)
            conv_b = self.get_bias(name)
            
            #现在不再使用tf.layers.conv2d(),因为我们已经有了pre-trained的参数
            #现在使用更基础的api: tf.nn.conv2d()
            h = tf.nn.conv2d(x, conv_w, [1,1,1,1], padding = 'same') #x是input，[1,1,1,1]是strides步长，因为这里x是四维，所以我们输入四个数
            h = tf.nn.bias_add(h, conv_b)
            
            #激活函数
            h = tf.nn.relu(h)
            return h
        
    #也是使用tf.nn.max_pooling()而不是tf.layers.max_pooling2d
    def pooling_layer(self, x, name):
        """Builds pooling layer."""
        return tf.nn.max_pooling(x,
                                 ksize = [1,2,2,1], #因为是按照长和宽来池化，所以是中间两个维度是2，其余维度是1
                                 strides = [1,2,2,1], 
                                 padding = 'same', 
                                 name = name) 
    
    
    def fc_layer(self, x, name, activation = tf.nn.relu):
        """Builds fully-connected layer."""
        with tf.name_scope(name):
            fc_w = self.get_fc_weight(name)
            fc_b = self.get_bias(name)
            h = tf.matmul(x, fc_w) #让输入x与w进行操作
            h = tf.nn.bias_add(h, fc_b)
            if activation == None:
                return h
            else:
                return activation(h)
            
    
    #创建展平功能，展平后输入给全连接层：做的是reshape操作，我们需要知道reshape之后的size有多大
    #展平之后，需要的长宽厚的乘积
    def flatten_layer(self, x, name):
        """Builds flatten layer."""
        with tf.name_scope(name):
            #[batch_size, img_width, img_height, channel]
            x_shape = x.get_shape().as_list()
            dim = 1
            for d in x_shape[1:]:
                dim *= d
            x = tf.reshape(x, [-1, dim]) #这里的-1，是reshape推断出来的，也是我们的batch_size, 你也可以写成[x_shape[0], dim]
            return x
       
    #建立vgg
    #我们现在就要做图像的风格转换，需要的图片只有一个，所以第一个维度是1
    #vggnet的设置中，图像大小是224*224
    def build(self, x_rgb):
        """BUild VGG16 network structure.
        Args:
        - x_rgb: eg. [1, 224, 224, 3]
        """
        start_time = time.time()
        print("Building model...")
        
        #每个通道减去均值VGG_MEAN，先拆分通道
        #复习：tf.split() 之前用于：深度可分离卷积，数据增强
        r, g, b = tf.split(x_rgb, [1,1,1], axis = 3) #切分成三通道：[1,1,1]
        
        #去除均值后，需要合并。这里注意vggnet输入的通道顺序是BGR
        #意味着之前写的VGG_MEAN的三个数分别是 BGR 的均值
        x_bgr = tf.concat([b - VGG_MEAN[0], 
                           g - VGG_MEAN[1],
                           r - VGG_MEAN[2]],
                          axis = 3) #在第四个维度，channel上合并
        
        #预处理之后，判断一下我们的维度是 224*224*3
        assert x_bgr.get_shape().as_list()[1:] == [224,224,3]
        
        #构建前两个卷积层：
        #vgg16：
        #第一个结构(stage)：两个卷积层 -> 一个池化层
        #第二个结构：两个卷积层 -> 一个池化层
        #第3个结构：3个卷积层 -> 一个池化层
        #第4个结构：3个卷积层 -> 一个池化层
        #第5个结构：3个卷积层 -> 一个池化层
        #第6个结构：3个全连接层
        #2*2 + 3*3 + 3 = 4 + 9 + 3 = 16, 也就是vgg16 
        
        ##注意：self.conv_layer(xx,yy)第二个参数的名字必须是data_dict.keys()中的
        #dict_keys(['conv5_1', 'fc6', 'conv5_3', 'conv5_2', 'fc8', 'fc7', 'conv4_1', 'conv4_2', 'conv4_3', 'conv3_3', 'conv3_2', 'conv3_1', 'conv1_1', 'conv1_2', 'conv2_2', 'conv2_1'])
        #我们将每一个层，设置成了成员变量, eg. self.conv1_1, 可能会用其中的某一层计算风格损失或者内容损失，设置成成员变量我们以后可以方便使用
        self.conv1_1 = self.conv_layer(x_bgr, 'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2, 'pool1') #pool1因为不是data_dict里面预处理好的，所以我们可以随意命名, 例如pool1。并且可以不用将它设置成成员函数，只不过这里为了统一起见
        
        self.conv2_1 = self.conv_layer(self.pool1, 'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1, 'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2, 'pool2')
        
        self.conv3_1 = self.conv_layer(self.pool2, 'conv3_1')
        self.conv3_2 = self.conv_layer(self.conv3_1, 'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2, 'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3, 'pool3')
        
        self.conv4_1 = self.conv_layer(self.pool3, 'conv4_1')
        self.conv4_2 = self.conv_layer(self.conv4_1, 'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2, 'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3, 'pool4')
        
        self.conv5_1 = self.conv_layer(self.pool4, 'conv5_1')
        self.conv5_2 = self.conv_layer(self.conv5_1, 'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2, 'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3, 'pool5')
        
        #展开 -> 全连接
        self.flatten5 = self.flatten_layer(self.pool5, 'flatten')
        self.fc6 = self.fc_layer(self.flatten, 'fc6')
        self.fc7 = self.fc_layer(self.fc6, 'fc7')
        
        #最后的fc8输出1k个值，给softmax()去计算概率分布
        #所以fc8不需要activation
        self.fc8 = self.fc_layer(self.fc7, 'fc8', activation = None)
        
        #计算softmax
        self.prob = tf.nn.softmax(self.fc8, name = 'prob')
        
        print("Building model finished: %4ds" % (time.time() - start_time))
        

In [5]:
#加载vgg16
vgg16_npy_path = '../../../other_datasets/vgg16.npy'
data_dict = np.load(vgg16_npy_path, encoding = 'latin1').item() #加item()是为了创建成字典

vgg16_for_result = VGGNet(data_dict)
content = tf.placeholder(tf.float32, shape = [1,224,224,3])
vgg16_for_result.build(content)

Building model...


ValueError: Attr 'padding' of 'Conv2D' Op passed string 'same' not in: "SAME", "VALID", "EXPLICIT".