In [5]:
import tensorflow as tf
import numpy as np
import scipy.io
import scipy.misc
import os
import time

In [14]:
def the_current_time():
    print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(int(time.time()))))
#打印当时的时间

In [6]:
CONTENT_IMG='content.jpg'
STYLE_IMG='style.jpg'
OUTPUT_DIR='neural_style_transfer_tensorflow/'

if not OUTPUT_DIR:
    os.mkdir(OUTPUT_DIR)
    
IMAGE_H=800
IMAGE_W=600
COLOR_C=3

NOISE_RATIO=0.7
BETA=5
ALPHA=100

VGG_MODEL='imagenet-vgg-verydeep-19.mat'
MEAN_VALUES=np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
#起到归一化的作用，原来是0-255的值


In [13]:
def load_image(path):
    image=scipy.misc.imread(path)   #从路径中读出图片
    image=scipy.misc.imresize(image,(IMAGE_H,IMAGE_W))  #更改图片的高和宽
    image=np.reshape(image,((1,)+image.shape))   #把二维的数据变为三维
    image=image-MEAN_VALUES
    return image

In [9]:
def load_vgg_model(path):
    vgg=scipy.io.loadmat(path)
    vgg_layers=vgg['layers']
    
    def _weights(layer,expected_layer_name):
        W=vgg_layers[0][layer][0][0][2][0][0]
        b=vgg_layers[0][layer][0][0][2][0][1]
        layer_name=vgg_layers[0][layer][0][0][0][0]
        assert layer_name == expected_layer_name    #判断语句
        return W,b 
    
    def _conv2d_relu(prev_layer,layer,layer_name):
        W,b=_weights(layer,layer_name)   #提取特定层的参数
        W=tf.constant(W)
        b=tf.constant(np.reshape(b,(b.size)))
        return tf.nn.relu(tf.nn.conv2d(prev_layer,filter=W,strides=[1,1,1,1], 
                                       padding='SAME')+b)
        #strides在定义中是一个四个元素的张量，其前后必须为1，中间两个数分别代表了水平滑动和垂直滑动步长值。
    def _avgpool(prev_layer):
        return tf.nn.avg_pool(prev_layer,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
    
    graph={}
    graph['input']=tf.Variable(np.zeros((1,IMAGE_H,IMAGE_W,COLOR_C)),dtype='float32')
    graph['conv1_1']=_conv2d_relu(graph['input'],0,'conv1_1')
    graph['conv1_2']=_conv2d_r+elu(graph['conv1_1'],2,'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    return graph

In [8]:
def generate_noise_image(content_image,noise_ratio=NOISE_RATIO):
    noise_image=np.random.uniform(-20,20,(1,IMAGE_H, IMAGE_W, COLOR_C)).astype('float32')
    #[)中均匀取值
    input_image=noise_image*noise_ratio+content_image*(1-noise_ratio)
    return input_image

In [7]:
def save_image(path,image):
    image=image+MEAN_VALUES
    image=image[0]
    image=np.clip(image,0,255).astype('uint8')
    scipy.misc.imsave(path,image)

In [10]:
def content_loss_func(sess,model):
    def _content_loss(p,x):  #p是内容，x是我们生成的图片
        N=p.shape[3]
        M=p.shape[1]*p.shape[2]
        return (1/(4*M*N))*tf.reduce_sum(tf.pow(x-p,2))
    return _content_loss(sess.run(model['conv4_2']),model['conv4_2'])
#第一个固定，第二个是变的

In [11]:
STYLE_LAYERS=[('conv1_1',0.5),('conv2_1',1.0),('conv3_1',1.5),('conv4_1',3.0),('conv5_1',4.0)]

def style_loss_func(sess,model):
    def _gram_matrix(F,N,M):
        Ft=tf.reshape(F,(F,M))
        return tf.matmul(tf.transpose(Ft),Ft)
    def _style_loss(a,x):
        N=a.shape[3]
        M=a.shape[1]*a.shape[2]
        A=_gram_matrix(a,N,M)
        G=_gram_matrix(x,N,M)
        return (1/(4*M*N))*tf.reduce_sum(tf.pow(G-A,2))
    return sum([_style_loss(sess.run(model[layer_name]),model[layer_name])*w 
                for layer_name,w in STYLE_LAYERS])

In [15]:
the_current_time()

2020-03-01 10:56:08


In [None]:
with tf.Session() as sess:
    content_image=load_image(CONTENT_IMAGE)
    style_image=load_image(STYLE_IMG)
    model=load_vgg_model(VGG_MODEL)
    
    input_image=generate_noise_image(content_image)
    sess.run(tf.global_variables_initializer)
    sess.run(model['input'].assign(content_image))
    
    sess.run(model['input'].assign(style_image))
    style_loss=style_loss_func(sess, model)
    
    total_loss=BETA*content_loss+ALPHA*style_loss
    optimizer=tf.train.AdamOptimizer(2.0)
    train=optimizer.minimize(total_loss)
    
    sess.run(tf.global_variables_initializer())
    sess.run(model['input'].assign(input_image))
    
    ITERATIONS=2000
    for i in range(ITERATIONS):
        sess.run(train)
        if i%100==0:
            output_image=sess.run(model['input'])
            the_current_time
            print('ITERATIONS %d' %i)
            print('Cost: ', sess.run(total_loss))
            
            save_image(os.path.join(OUTPUT_DIR, 'output_%d.jpg' % i), output_image)

In [4]:
(1,)+(2,3)

(1, 2, 3)