In [1]:
!pip install tensorflow==1.13.1

Collecting tensorflow==1.13.1
  Downloading tensorflow-1.13.1-cp37-cp37m-manylinux1_x86_64.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<1.14.0rc0,>=1.13.0
  Downloading tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m367.6/367.6 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
Collecting keras-applications>=1.0.6
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting astor>=0.6.0
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Collecting tensorboard<1.14.0,>=1.13.0
  Downloading tensorboard-1.13.1-py3-none-any.whl (3.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m28.8 MB/s[0m eta [36m0:00:0

In [2]:
import os
import math
import numpy as np
import tensorflow as tf
from PIL import Image
import time
print(tf.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.13.1


In [3]:
VGG_MEAN = [103.939, 116.779, 123.68]


class VGGNet:
    """Builds VGG-16 net structure,
    load parameters from pre-train models."""

    # 预训练好的模型的权重
    def __init__(self, data_dict):
        self.data_dict = data_dict

    def get_conv_filter(self, name):
        return tf.constant(self.data_dict[name][0], name='conv')

    def get_fc_weight(self, name):
        return tf.constant(self.data_dict[name][0], name='fc')

    def get_bias(self, name):
        return tf.constant(self.data_dict[name][1], name='bias')

    def conv_layer(self, x, name):
        """Builds convolution layer."""
        with tf.name_scope(name):
            conv_w = self.get_conv_filter(name)
            conv_b = self.get_bias(name)
            # input, filter=[filter_height, filter_width, in_channels, out_channels], strides, padding
            # [1, 1, 1, 1]各个维度的stride
            conv = tf.nn.conv2d(x, conv_w, [1, 1, 1, 1], padding='SAME')
            conv = tf.nn.bias_add(conv, conv_b)
            conv = tf.nn.relu(conv)
            return conv

    def pooling_layer(self, x, name):
        """Builds pooling layer."""
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME',
                              name=name)

    def flattrn_layer(self, x, name):
        """Builds flatten layer."""
        with tf.name_scope(name):
            x_shape = x.get_shape().as_list()
            dim = 1  # 维度[image_width, image_height, channel]累乘 进行展平
            for d in x_shape[1:]:
                dim *= d
            x = tf.reshape(x, [-1, dim])  # -1会变为batch_size
            return x

    def fully_connected_layers(self, x, name, activation=tf.nn.relu):
        """Builds fully-connected layer."""
        with tf.name_scope(name):
            fc_w = self.get_fc_weight(name)
            fc_b = self.get_bias(name)
            fc = tf.matmul(x, fc_w)
            fc = tf.nn.bias_add(fc, fc_b)
            if activation is None:
                return fc
            else:
                return activation(fc)

    def build(self, x_rgb):
        """Build VGG16 network structure."""
        # x_rgb: [1, 224, 224, 3]
        start_time = time.time()
        print('building model...')
        r, g, b = tf.split(x_rgb, [1, 1, 1], axis=3)
        # 每个通道减去均值后再次合并
        x_bgr = tf.concat([b - VGG_MEAN[0],
                             g - VGG_MEAN[1],
                             r - VGG_MEAN[2]], axis=3)
        assert x_bgr.get_shape().as_list()[1:]==[224, 224, 3]
        self.conv1_1 = self.conv_layer(x_bgr, 'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2, 'pool1')

        self.conv2_1 = self.conv_layer(self.pool1, 'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1, 'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2, 'pool2')

        self.conv3_1 = self.conv_layer(self.pool2, 'conv3_1')
        self.conv3_2 = self.conv_layer(self.conv3_1, 'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2, 'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3, 'pool3')

        self.conv4_1 = self.conv_layer(self.pool3, 'conv4_1')
        self.conv4_2 = self.conv_layer(self.conv4_1, 'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2, 'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3, 'pool4')

        self.conv5_1 = self.conv_layer(self.pool4, 'conv5_1')
        self.conv5_2 = self.conv_layer(self.conv5_1, 'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2, 'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3, 'pool5')

        # 全连接层 耗时耗内存
        """self.flatten5 = self.flattrn_layer(self.pool5, 'flatten')
        self.fc6 = self.fully_connected_layers(self.flatten5, 'fc6')
        self.fc7 = self.fully_connected_layers(self.fc6, 'fc7')
        self.fc8 = self.fully_connected_layers(self.fc7, 'fc8')
        self.prob = tf.nn.softmax(self.fc8, name='prob')"""

        print('building model finished: %4ds' % (time.time() - start_time))

In [4]:
# 测试模型构建时间
vgg16_npy_path = '../input/vgg16model/vgg16.npy'
data_dict = np.load(vgg16_npy_path, encoding='latin1', allow_pickle=True).item()
vgg16_for_result = VGGNet(data_dict)
content = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
vgg16_for_result.build(content)

building model...
building model finished:    0s


In [5]:
!rm -rf /kaggle/working/run_ImageStyleTransfer

In [6]:
vgg16_npy_path = '/kaggle/input/vgg16model/vgg16.npy'
content_img_path = '/kaggle/input/imagestyletransfer/gugong.jpg'  # 内容图像路径
style_img_path = '/kaggle/input/imagestyletransfer/xingkong.jpeg'  # 风格图像路径

num_steps = 100  #训练步数
learining_rate = 10
lambda_content = 0.1  # 内容损失的系数 (内容损失一般较大)
lambda_style = 500  # 风格损失系数 (风格损失一般较小)
output_dir = '/kaggle/working/run_ImageStyleTransfer'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [7]:
# 对图像进行初始化
def initial_result(shape, mean, stddev):
    # 截断产生均值为mean,标准差为stddev的正态分布的张量
    initial = tf.truncated_normal(shape, mean=mean, stddev=stddev)
    return tf.Variable(initial)


def read_image(image_name):
    img = Image.open(image_name)
    np_img = np.array(img)  # (224, 224, 3)
    np_img = np.asarray([np_img], dtype=np.int32)  # (1, 224, 224, 3)
    return np_img


# gram矩阵是计算每个通道i的feature map与每个通道j的feature map的内积
# gram matrix的每个值可以说是代表i通道的feature map与j通道的feature map的互相关程度
def gram_matrix(x):
    """Calulates gram matrix"""
    # [1, width, height, ch]
    # 获取各个维度的值，b是样本数，h高度，w宽度，ch通道数
    b, h, w, ch = x.get_shape().as_list()
    features = tf.reshape(x, [b, h * w, ch])
    # [h*w, ch]-->[ch, h*w] * [h*w, ch]-->[ch, ch]
    # 计算任意两列的相似度 adjoint_a=True将第一个features转置
    # 除以一个常量为防止最终的数值比较大
    gram = tf.matmul(features, features, adjoint_a=True) / tf.constant(h * w * ch, tf.float32)
    return gram

In [8]:
result = initial_result((1, 224, 224, 3), 127.5, 20)
content_value = read_image(content_img_path)
style_value = read_image(style_img_path)
# 1.0版本需要
content = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
style = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
data_dict = np.load(vgg16_npy_path, encoding='latin1', allow_pickle=True).item()
# 创建3个VGGNet
vgg_for_content = VGGNet(data_dict)
vgg_for_style = VGGNet(data_dict)
vgg_for_result = VGGNet(data_dict)
vgg_for_content.build(content)
vgg_for_style.build(style)
vgg_for_result.build(result)
# 层次超参数 多层效果较好
# 内容特征初始化层次
content_features = [
    vgg_for_content.conv1_2,
    # vgg_for_content.conv2_2,
    # vgg_for_content.conv3_3,
    # vgg_for_content.conv4_3,
    # vgg_for_content.conv5_3, 
]
# 一定要和内容特征的层数保持一致
result_content_features = [
    vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    # vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3, 
]
# 风格特征初始化层次
style_features = [
    # vgg_for_style.conv1_2,
    # vgg_for_style.conv2_2,
    # vgg_for_style.conv3_3,
    vgg_for_style.conv4_3,
    # vgg_for_style.conv5_3, 
]
# 给结果图像提取风格特征，和风格特征图像的层次必须一致
result_style_features = [
    # vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3, 
]

building model...
building model finished:    0s
building model...
building model finished:    0s
building model...
building model finished:    0s


In [9]:
# 风格图像的gram矩阵，gram矩阵是两两通道之间的相似度
style_gram = [gram_matrix(feature) for feature in style_features]
# 结果图像的gram矩阵
result_style_gram = [gram_matrix(feature) for feature in result_style_features]

In [10]:
content_loss = tf.zeros(1, tf.float32)
# # shape: [1, width, height, channel]
# 每一层计算损失
for c, c_ in zip(content_features, result_content_features):
    content_loss += tf.reduce_mean((c - c_) ** 2, [1, 2, 3])
style_loss = tf.zeros(1, tf.float32)
for s, s_ in zip(style_gram, result_style_gram):
    style_loss += tf.reduce_mean((s - s_) ** 2, [1, 2])
loss = content_loss * lambda_content + style_loss * lambda_style
train_op = tf.train.AdamOptimizer(learining_rate).minimize(loss)

In [11]:
init_op = tf.global_variables_initializer()
with tf.Session() as se:
    se.run(init_op)
    for step in range(num_steps):
        loss_value, content_loss_value, style_loss_value, _ \
            = se.run([loss, content_loss, style_loss, train_op],
                     feed_dict={content: content_value, style: style_value})
        print('step: %d, loss_value: %8.4f, content_loss: %8.4f, style_loss: %8.4f'
              % (step + 1, loss_value[0], content_loss_value[0], style_loss_value[0]))
        # 存储每一步的结果图像
        result_img_path = os.path.join(output_dir, 'result_%05d.jpg' % (step + 1))
        result_value=result.eval(se)[0]
        result_value=np.clip(result_value,0,255)
        img_arr=np.asarray(result_value,np.uint8)
        img=Image.fromarray(img_arr)
        img.save(result_img_path)


2022-07-30 11:57:19.279252: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2022-07-30 11:57:19.283735: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2000194999 Hz
2022-07-30 11:57:19.283977: I tensorflow/compiler/xla/service/service.cc:150] XLA service 0x5640056a39f0 executing computations on platform Host. Devices:
2022-07-30 11:57:19.284004: I tensorflow/compiler/xla/service/service.cc:158]   StreamExecutor device (0): <undefined>, <undefined>


step: 1, loss_value: 14285.2324, content_loss: 60667.9180, style_loss:  16.4369
step: 2, loss_value: 11850.0840, content_loss: 46002.8164, style_loss:  14.4996
step: 3, loss_value: 8979.9717, content_loss: 37214.1484, style_loss:  10.5171
step: 4, loss_value: 7265.5693, content_loss: 32360.7480, style_loss:   8.0590
step: 5, loss_value: 6518.0308, content_loss: 29499.8984, style_loss:   7.1361
step: 6, loss_value: 6145.0835, content_loss: 27918.6113, style_loss:   6.7064
step: 7, loss_value: 5186.3701, content_loss: 26989.9336, style_loss:   4.9748
step: 8, loss_value: 5144.0249, content_loss: 26547.0156, style_loss:   4.9786
step: 9, loss_value: 4565.4263, content_loss: 26261.2812, style_loss:   3.8786
step: 10, loss_value: 4495.4644, content_loss: 26083.2070, style_loss:   3.7743
step: 11, loss_value: 4245.8828, content_loss: 25916.8730, style_loss:   3.3084
step: 12, loss_value: 4188.7603, content_loss: 25682.7246, style_loss:   3.2410
step: 13, loss_value: 3941.8713, content_loss: 