In [1]:
import tensorflow as tf
import imageio
from nst_utils import *

In [2]:
style_image = imageio.imread("images/monet.jpg")
style_image = reshape_and_normalize_image(style_image)

In [3]:
model_S = load_vgg_model(style_image, "pretrained-model/imagenet-vgg-verydeep-19.mat")

In [4]:
content_image = imageio.imread("images/louvre_small.jpg")
content_image = reshape_and_normalize_image(content_image)

In [5]:
model_C = load_vgg_model(content_image, "pretrained-model/imagenet-vgg-verydeep-19.mat")

In [6]:
generated_image = generate_noise_image(content_image)

In [7]:
model_G = load_vgg_model(generated_image, "pretrained-model/imagenet-vgg-verydeep-19.mat")

In [8]:
def compute_content_cost(a_C, a_G):
    """
    计算内容损失
    """

    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    
    # 将3维转换维2维
    a_C_unrolled = tf.transpose(a_C)
    a_G_unrolled = tf.transpose(a_G)
    
    # 利用前面的公式计算内容损失
    J_content = (1/ (4* n_H * n_W * n_C)) * tf.reduce_sum(tf.pow((a_G_unrolled - a_C_unrolled), 2))

    return J_content

In [9]:
#测试
a_C = model_C['conv4_2']
a_G = model_G['conv4_2']

J_content = compute_content_cost(a_C, a_G)

print(J_content)

tf.Tensor(7671.205, shape=(), dtype=float32)


In [10]:
# 计算风格矩阵

def gram_matrix(A):

    GA = tf.matmul(A, tf.transpose(A))
    
    return GA

In [11]:
# 计算某一层的风格损失
def compute_layer_style_cost(a_S, a_G):

    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    
    # 改变激活值的维度
    a_S = tf.transpose(tf.reshape(a_S, [n_H*n_W, n_C]))
    a_G = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))

    # 计算风格矩阵
    GS = gram_matrix(a_S)
    GG = gram_matrix(a_G)

    # 计算风格损失
    J_style_layer = (1./(4 * n_C**2 * (n_H*n_W)**2)) * tf.reduce_sum(tf.pow((GS - GG), 2))
    
    return J_style_layer

In [13]:
STYLE_LAYERS = [
    ('conv1_1', 0.2),
    ('conv2_1', 0.2),
    ('conv3_1', 0.2),
    ('conv4_1', 0.2),
    ('conv5_1', 0.2)]

In [14]:
#我自己加的一个均一化函数
def normalize(data):
    mean=tf.reduce_mean(data)
    mx=tf.reduce_max(data)
    mn=tf.reduce_min(data)
    
    return (data-mean)/(mx-mn)


In [15]:
def compute_style_cost(model_S, model_G, STYLE_LAYERS):
    """
    就是多个神经网络层的风格损失
    
    参数:
    model -- tensorflow模型，在这里是VGG模型
    STYLE_LAYERS -- 风格权重
    """
    
    J_style = tf.constant(0.0, dtype=tf.float32)
    

    for layer_name, coeff in STYLE_LAYERS:


        # 执行这层神经网络，将这层的激活值存到a_S中
        a_S = model_S[layer_name]
        
        a_G = model_G[layer_name]
        
        a_S_n=normalize(a_S)
        a_G_n=normalize(a_G)
        
        # 计算这一层的风格损失
        J_style_layer = compute_layer_style_cost(a_S_n, a_G_n)

        # 整合每一层的风格损失
        J_style += coeff * J_style_layer

    return J_style

In [16]:
J_style = compute_style_cost(model_S, model_G, STYLE_LAYERS)
print(J_style)

tf.Tensor(5.411967e-07, shape=(), dtype=float32)


In [20]:
# 完整的损失函数
def total_cost(J_content, J_style, alpha = 1, beta = 2e+11):
    
    J = alpha * J_content + beta * J_style
    
    return J

In [21]:
J = total_cost(J_content, J_style, alpha = 1, beta = 2e+11)
print(J)

tf.Tensor(115910.54, shape=(), dtype=float32)


In [22]:
optimizer = tf.keras.optimizers.Adam(2.0)

In [23]:
def model_nn(model_C, model_S, generated_image, STYLE_LAYERS, num_epochs = 200):
    
    for epoch in range(num_epochs+1):
        with tf.GradientTape() as tape:
            model_G = load_vgg_model(generated_image, "pretrained-model/imagenet-vgg-verydeep-19.mat")
            J_content = compute_content_cost(model_C['conv4_2'], model_G['conv4_2'])
            J_style = compute_style_cost(model_S, model_G, STYLE_LAYERS)
            J = total_cost(J_content, J_style, alpha = 1, beta = 2e+11)
            
        grads = tape.gradient(J,[model_G['input']])
        optimizer.apply_gradients(zip(grads,[model_G['input']]))
        generated_image = model_G['input']
        
        if epoch%20 == 0:
            tf.print('After epoch:', epoch)
            tf.print('total cost = ', J)
            tf.print('content cost = ', J_content*1)
            tf.print('style cost = ', J_style*2e+11)
            
            save_image("output/" + str(epoch) + ".png", generated_image)
            
    # 经过上面的训练后，将最终的生成图片保存起来。
    save_image('output/generated_image.jpg', generated_image)
    
    return generated_image    

In [25]:
model_nn(model_C, model_S, generated_image, STYLE_LAYERS, num_epochs = 20)

After epoch: 0
total cost =  115910.539
content cost =  7671.20508
style cost =  108239.336
After epoch: 20
total cost =  45191.3828
content cost =  7487.06
style cost =  37704.3242


<tf.Variable 'Variable:0' shape=(1, 300, 400, 3) dtype=float32, numpy=
array([[[[-46.54687   , -14.251657  ,  10.505059  ],
         [-28.419853  , -12.724619  ,   1.6518912 ],
         [-44.472576  , -14.788467  ,   7.0801272 ],
         ...,
         [-30.25907   ,  -1.5379884 , -34.888832  ],
         [-38.846283  , -18.535849  , -30.539268  ],
         [-38.392197  ,  -6.954854  ,  -7.6627355 ]],

        [[-12.997549  , -18.848644  , -22.0408    ],
         [-34.579197  , -37.296764  , -10.941256  ],
         [-28.486677  , -22.586496  ,   5.022859  ],
         ...,
         [-14.788296  , -19.345076  , -10.3291    ],
         [-32.38979   ,  -7.7613444 , -18.652178  ],
         [-25.397675  ,   2.7211418 , -23.743399  ]],

        [[-42.048943  , -28.77278   , -17.590612  ],
         [-42.43826   , -24.82773   ,   0.4477296 ],
         [-31.090736  , -24.140501  ,   6.21704   ],
         ...,
         [-23.098715  ,  -5.9190526 , -26.699     ],
         [-30.139278  , -17.209526 