style transfer只是将全图的纹理信息整合到了content img上，但是画画时，在每一种风格下，不同的物体有不同边缘画法，会拉出不一样的线条长度，这些才是关键的画师技巧，简单的将一张图片上的纹理强行烙印在另一张图片上并不是创作。比如style图中房子物体的画法，其纹理将被直接复制在content img上，即使content img上对应部分并不是一个房子。

In [1]:
import os
from PIL import Image, ImageOps
import numpy as np
import scipy.misc
from six.moves import urllib

In [2]:
def download(download_link, file_name, expected_bytes):
    #下载VGG-19预训练模型
    if os.path.exists(file_name):
        print("VGG-19 pre-trained model is ready")
        return
    print("Downloading the VGG pre-trained model.This might take a while ...")
    file_name , _ = urllib.request.urlretrieve(download_link, file_name)
    file_stat = os.stat(file_name)
    if file_stat.st_size == expected_bytes:
        print("Successfully downloaded VGG-19 pre-trained model", file_name)
    else:
        raise Exception("File" + file_name + " might be corrupted. You should try downloading it with a browser.")


In [3]:
def get_resized_image(img_path, width, height, save = True):
    image = Image.open(img_path)
    image = ImageOps.fit(image,(width, height), Image.ANTIALIAS)
    if save:
        image_dirs = img_path.split('/')
        image_dirs[-1] = 'resized ' + image_dirs[-1]
        out_path = '/'.join(image_dirs)
        if not os.path.exists(out_path):
            image.save(out_path)
    image = np.asarray(image,np.float32)
    return np.expand_dims(image, 0)


In [4]:
def generate_noise_image(content_image, width, height, noise_ratio=0.6):
    noise_image = np.random.uniform(-20,20,(1,height,width,3)).astype(np.float32)
    return noise_image * noise_ratio + content_image*(1 - noise_ratio)


In [5]:
def save_image(path, image):
    image = image[0]
    image = np.clip(image, 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)
    

In [6]:
def safe_mkdir(path):
    try:
        os.mkdir(path)
    except OSError:
        pass
    

load_vgg_solution

In [7]:
import numpy as np
import scipy.io
import tensorflow as tf

In [8]:
VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
VGG_FILENAME = 'imagenet-vgg-verydeep-19.mat'
EXPECTED_BYTES = 534904783

self.vgg_layers[0][layer_idx]的结构
conv->e.g: layers{1,1}:{name,type,weights,size,pad,stride,precious,dilate,opts}
relu->e.g: layers{1,2}:{name,type,leak,weights,precious}
pool->e.g: layers{1,5}:{name,type,method,pool,stride,pad,weights,precious,opts}
layers{1,1}:[[{A1,A2,A3,A4,A5,A6,A7,A8,A9}]]其中A3=[[{A3_1,A3_2}]]对应w,b
所以[0][0][2][0][0]对应W,[0][0][2][0][1]对应b

[[(array(['conv1_1'], dtype='<U7'), array(['conv'], dtype='<U4'), array([[array([[[[ 0.39416704, -0.08419707, -0.03631314, ..., -0.10720515,
           -0.03804016,  0.04690642],
         [ 0.46418372,  0.03355668,  0.10245045, ..., -0.06945956,
          -0.04020201,  0.04048637],
         [ 0.34119523,  0.09563112,  0.0177449 , ..., -0.11436455,
          -0.05099866, -0.00299793]],

        [[ 0.37740308, -0.07876257, -0.04775979, ..., -0.11827433,
          -0.19008617, -0.01889699],
         [ 0.41810837,  0.05260524,  0.09755926, ..., -0.09385028,
          -0.20492788, -0.0573062 ],
         [ 0.33999205,  0.13363543,  0.02129423, ..., -0.13025227,
          -0.16508926, -0.06969624]],

        [[-0.04594866, -0.11583115, -0.14462094, ..., -0.12290562,
          -0.35782176, -0.27979308],
         [-0.04806903, -0.00658076, -0.02234544, ..., -0.0878844 ,
          -0.3915486 , -0.34632796],
         [-0.04484424,  0.06471398, -0.07631404, ..., -0.12629718,
          -0.29905206, -0.2825364 ]]],


       [[[ 0.2671299 , -0.07969447,  0.05988706, ..., -0.09225675,
           0.31764674,  0.42209673],
         [ 0.30511212,  0.05677647,  0.21688674, ..., -0.06828708,
           0.3440761 ,  0.44033417],
         [ 0.23215917,  0.133657  ,  0.12134422, ..., -0.1063385 ,
           0.28406844,  0.3594997 ]],

        [[ 0.09986369, -0.06240906,  0.07442063, ..., -0.02214639,
           0.25912452,  0.423499  ],
         [ 0.10385381,  0.08851637,  0.2392226 , ..., -0.01210995,
           0.27064082,  0.40848857],
         [ 0.08978214,  0.18505956,  0.15264879, ..., -0.04266965,
           0.25779948,  0.35873157]],

        [[-0.34100872, -0.13399366, -0.11510294, ..., -0.11911335,
          -0.23109646, -0.19202407],
         [-0.37314063, -0.00698938,  0.02153259, ..., -0.09827439,
          -0.2535741 , -0.25541356],
         [-0.30331427,  0.08002605, -0.03926321, ..., -0.12958746,
          -0.19778992, -0.21510386]]],


       [[[-0.07573577, -0.07806503, -0.03540679, ..., -0.1208065 ,
           0.20088433,  0.09790061],
         [-0.07646758,  0.03879711,  0.09974211, ..., -0.08732687,
           0.2247974 ,  0.10158388],
         [-0.07260918,  0.10084777,  0.01313597, ..., -0.12594968,
           0.1464741 ,  0.05009392]],

        [[-0.2803425 , -0.07094654, -0.0387974 , ..., -0.08843154,
           0.18996507,  0.07766484],
         [-0.3107071 ,  0.06031388,  0.10412455, ..., -0.06832542,
           0.20279962,  0.05222717],
         [-0.246675  ,  0.1414054 ,  0.02605635, ..., -0.10128672,
           0.16340195,  0.02832468]],

        [[-0.41602272, -0.11491341, -0.14672887, ..., -0.13079506,
          -0.1379628 , -0.2658845 ],
         [-0.46453714, -0.00576723, -0.02660675, ..., -0.10017379,
          -0.15603794, -0.32566148],
         [-0.33683276,  0.06601517, -0.08144748, ..., -0.13460518,
          -0.1342358 , -0.27096185]]]], dtype=float32),
        array([[ 0.7301776 ],
       [ 0.06493629],
       [ 0.03428847],
       [ 0.8260386 ],
       [ 0.2578029 ],
       [ 0.54867655],
       [-0.01243854],
       [ 0.34789944],
       [ 0.5510871 ],
       [ 0.06297145],
       [ 0.6069906 ],
       [ 0.26703122],
       [ 0.649414  ],
       [ 0.17073655],
       [ 0.4772309 ],
       [ 0.38250586],
       [ 0.46373144],
       [ 0.21496128],
       [ 0.46911287],
       [ 0.23825859],
       [ 0.4751922 ],
       [ 0.70606434],
       [ 0.27007523],
       [ 0.6855273 ],
       [ 0.03216552],
       [ 0.6025288 ],
       [ 0.3503486 ],
       [ 0.446798  ],
       [ 0.7732652 ],
       [ 0.58191687],
       [ 0.39083108],
       [ 1.7519354 ],
       [ 0.66117406],
       [ 0.30213955],
       [ 0.53059655],
       [ 0.6773747 ],
       [ 0.33273223],
       [ 0.49127793],
       [ 0.26548928],
       [ 0.18805602],
       [ 0.07412001],
       [ 1.1081088 ],
       [ 0.28224325],
       [ 0.86755145],
       [ 0.19422948],
       [ 0.810332  ],
       [ 0.36062282],
       [ 0.5072004 ],
       [ 0.42472315],
       [ 0.49632648],
       [ 0.15117475],
       [ 0.79454446],
       [ 0.33494323],
       [ 0.47283995],
       [ 0.41552398],
       [ 0.08496041],
       [ 0.37947032],
       [ 0.6006739 ],
       [ 0.47174454],
       [ 0.8130921 ],
       [ 0.45521152],
       [ 1.0892007 ],
       [ 0.47757268],
       [ 0.4072122 ]], dtype=float32)]], dtype=object), array([[ 3,  3,  3, 64]], dtype=uint8), array([[1, 1, 1, 1]], dtype=uint8), array([[1, 1]], dtype=uint8), array([[0]], dtype=uint8), array([[1]], dtype=uint8), array([], shape=(0, 0), dtype=object))]]

In [9]:
class VGG(object):
    def __init__(self, input_img):
        download(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES)
        #将layers标记转为dict
        self.vgg_layers = scipy.io.loadmat(VGG_FILENAME)['layers']
        #要被计算的图片
        self.input_img = input_img
        #对三个维度做平均中心化使用的参数
        self.mean_pixels = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
        
    def _weights(self,layer_idx, expected_layer_name):
        #获取训练好的VGG网络的layer_idx层的权值和偏置
        #每层按 卷积 relu 卷积 relu maxpooling
        W = self.vgg_layers[0][layer_idx][0][0][2][0][0]
        #print(self.vgg_layers[0][layer_idx])
        b = self.vgg_layers[0][layer_idx][0][0][2][0][1]
        layer_name = self.vgg_layers[0][layer_idx][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b.reshape(b.size)
    
    def conv2d_relu(self, prev_layer, layer_idx, layer_name):
        """返回应用了vgg每层W,b的计算结果，并添加relu计算"""
        with tf.variable_scope(layer_name) as scope:
            W, b = self._weights(layer_idx, layer_name)
            W = tf.constant(W, name = 'weights')
            b = tf.constant(b, name = 'bias')
            conv2d = tf.nn.conv2d(prev_layer, filter = W, strides = [1,1,1,1], padding = 'SAME')
            out = tf.nn.relu(conv2d + b)
        setattr(self, layer_name, out)
        
    def avgpool(self, prev_layer, layer_name):
        with tf.variable_scope(layer_name):
            out = tf.nn.avg_pool(prev_layer,
                                ksize = [1,2,2,1],
                                strides = [1,2,2,1],
                                padding = 'SAME')
        setattr(self, layer_name, out)
    
    def load(self):
        self.conv2d_relu(self.input_img, 0, 'conv1_1')
        self.conv2d_relu(self.conv1_1, 2, 'conv1_2')
        self.avgpool(self.conv1_2, 'avgpool1')
        self.conv2d_relu(self.avgpool1, 5, 'conv2_1')
        self.conv2d_relu(self.conv2_1, 7, 'conv2_2')
        self.avgpool(self.conv2_2, 'avgpool2')
        self.conv2d_relu(self.avgpool2, 10, 'conv3_1')
        self.conv2d_relu(self.conv3_1, 12, 'conv3_2')
        self.conv2d_relu(self.conv3_2, 14, 'conv3_3')
        self.conv2d_relu(self.conv3_3, 16, 'conv3_4')
        self.avgpool(self.conv3_4, 'avgpool3')
        self.conv2d_relu(self.avgpool3, 19, 'conv4_1')
        self.conv2d_relu(self.conv4_1, 21, 'conv4_2')
        self.conv2d_relu(self.conv4_2, 23, 'conv4_3')
        self.conv2d_relu(self.conv4_3, 25, 'conv4_4')
        self.avgpool(self.conv4_4, 'avgpool4')
        self.conv2d_relu(self.avgpool4, 28, 'conv5_1')
        self.conv2d_relu(self.conv5_1, 30, 'conv5_2')
        self.conv2d_relu(self.conv5_2, 32, 'conv5_3')
        self.conv2d_relu(self.conv5_3, 34, 'conv5_4')
        self.avgpool(self.conv5_4, 'avgpool5')

##测试用
vgg = VGG(tf.get_variable('img_in',shape=([1,333,250,3]),dtype = tf.float32, initializer = tf.zeros_initializer()))
vgg.conv2d_relu(vgg.input_img, 0, 'conv1_1')

In [None]:
import os
import time
import numpy as np
import tensorflow as tf

def setup():
    safe_mkdir('checkpoints')
    safe_mkdir('outputs')

class StyleTransfer(object):
    def __init__(self, content_img, style_img, img_width, img_height):
        self.img_width = img_width
        self.img_height = img_height
        self.content_img = get_resized_img(content_img, img_width, img_height)
        self.style_img = get_resized_img(style_img, img_width, img_height)
        self.initial_img = generate_noise_image(self.content_img, img_width, img_height)
        #定义全局超参,以及模型执行过程中用到的参数
        #采集content error
        self.content_layer = 'conv4_2'
        #采集style error
        self.style_layers = ['conv1_1','conv2_1','conv3_1','conv4_1','conv5_1']
        self.content_w = 0.01
        self.style_w = 1
        self.style_layer_w = [0.5,1.0,1.5,3.0,4.0]
        #记录当前进度
        self.gstep = tf.Variable(0,dtype = tf.int32,
                                trainable = False, name = 'global_step')
        self.lr = 2.0
    
    def create_input(self):
        #原文说要将style img,content img,generate img三图放在一个变量里一起训练
        with tf.variable_scope('input') as scope:
            self.input_img = tf.get_variable('in_img',shape = ([1,self.img_height,self.img_width,3]),
                                            dtype = tf.float32,
                                            initializer = tf.zeros_initializer())
    
    def load_vgg(self):
        #加载vgg19模型,输入input_img得到计算结果,对content和style图片做平均中心化
        self.vgg = VGG(self.input_img)
        self.vgg.load()
        self.content_img -= self.vgg.mean_pixels
        self.style_img -= self.vgg.mean_pixels
        
    def _content_loss(self, P, F):
        #如何计算contetn_loss
        #P: content representation of the content image
        #F: content representation of the generated image
        #为什么要4*P.size
        """However, in practice, we’ve found that this function
        makes it really slow to converge, so  people often replace the coefficient ½ 
        with 1/(4s) in which s is the product of the dimension  of P. 
        If P has dimension [5, 5, 3] then s = 5 * 5 * 3 = 75.  """
        self.content_loss = tf.reduce_sum((F-P)**2)/(4.0*P.size))
        
    def _gram_matrix(self, F, N, M):
        #F的内积矩阵,同一层的不同激活图之间的协方差矩阵定义了风格
        F = tf.reshape(F,(M,N))
        return tf.matmul(tf.transpose(F),F)
    
    def _single_style_loss(self, a, g):
        #计算style loss， a是style image的vgg计算结果,g是生成图的vgg计算结果
        #N = depth, M = width*height
        N = a.shape[3]
        M = a.shape[1]*a.shape[2]
        #计算A的style
        A = self._gram_matrix(a, N, M)
        G = self._gram_matrix(g, N, M)
        #计算损失 alpha = beta = 0.5 为C,S的权重
        return tf.reduce_mean((G-A)**2)/((2*N*M)**2)
    
    def _style_loss(self,A):
        #计算每一层的loss，存起来
        n_layers = len(A)
        E = [self._single_style_loss(A[i],getattr(self.vgg,
                                                 self.style_layers[i])) for i in range(n_layers)]
        self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])
        
    def losses(self):
        #总计的c_loss + s_loss
        with tf.variable_scope('losses') as scope:
            #打开一个session,计算传入content图的结果
            with tf.Session() as sess:
                #在原始图中添加并执行一个assign操作节点
                sess.run(self.input_img.assign(self.content_img))
                #定义content提取图操作
                gen_img_content = getattr(self.vgg, self.content_layer)
                #获取vgg对于生成图计算结果中第content_layer层的计算结果
                #改变了input_img后，根据content_layer的计算图的定义，执行，生成内容图的content
                content_img_content = sess.run(gen_img_content)
            #定义C_loss的计算节点
            self._content_loss(content_img_content,gen_img_content)
            
            #打开一个session,计算传入style图的结果
            with tf.Session() as sess:
                sess.run(self.input_img.assign(self.style_img))
                #改变input,获取定义的图，执行，获得结果
                style_layers = sess.run([getattr(self.vgg,layer) for layer in self.style_layers])
            #定义S_loss的计算节点
            self._style_loss(style_layers)
            #定义total_loss
            self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss
    
    #定义反向传播更新的梯度方向
    def optimize(self):
        self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss,global_step = self.gstep)
        
    def create_summary(self):
        #分门别类的整理loss
        with tf.name_scope('summaries'):
            tf.summary.scalar('content loss', self.content_loss)
            tf.summary.scalar('style loss', self.style_loss)
            tf.summary.scalar('total loss', self.total_loss)
            self.summary_op = tf.summary.merge_all()
            
    def build(self):
        self.create_input()
        self.load_vgg()
        self.losses()
        self.optimize()
        self.create_summary()
        
    def train(self, n_iters):
        skip_step = 1
        #打开一个session 传入噪声初始图片开始更新
        with tf.Session() as sess:
            #初始化模型变量
            sess.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter('graphs/style_transfer', sess.graph)
            sess.run(self.input_img.assign(self.initial_img))
            
            saver = tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/style_transfer/checkpoint'))
            #当有检查点文件时，从文件恢复上次会话
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            
            initial_step = self.gstep.eval()
            
            start_time = time.time()
            for index in range(initial_step, n_iters):
                if index >= 5 and index <20:
                    skip_step = 10
                elif index >= 20:
                    skip_step = 20
                sess.run(self.opt)
                if (index + 1) % skip_step == 0:
                    gen_image, total_loss, summary = sess.run([self.input_img,
                                                              self.total_loss,
                                                              self.summary_op])
                    gen_image = gen_image + self.vgg.mean_pixels
                    writer.add_summary(summary, global_step=index)
                    print('Step {}\n Sum: {:5.1f}'.format(index+1,np.sum(gen_image)))
                    print('    Loss: {:5.1f}'.format(total_loss))
                    print('    Took: {} seconds'.format(time.time() - start_time))

                    start_time = time.time()
                    filename = 'outputs/%d.png' % (index)
                    save_image(filename, gen_image)

                    if (index + 1) % 20 == 0 :
                        saver.save(sess,'checkpoints/style_transfer/style_transfer',index)

In [None]:
machine = StyleTransfer('content/deadpool.jpg', 'styles/guernica.jpg', 333, 250)
machine.build()
machine.train(300)