### 第11天：TensorFlow2项目实战—快速风格迁移

In [4]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, ReLU, UpSampling2D
# 需要根据python版本，tf版本安装对应的tensorflow_addons
from tensorflow_addons.layers import InstanceNormalization

# 计算损失的python文件
from lossnetwork import loadimg, grammatrix, stylelossfunc, contentlossfunc,totalvariationloss

import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import sys
import os
import shutil
plt.rcParams['font.family'] = 'SimHei'  # 绘图显示中文 
plt.rcParams['axes.unicode_minus']=False  # 绘图显示负号

print('python版本：', sys.version)
print('tensorlfow版本:',tf.__version__)
print('可用GPU数量:', len(tf.config.experimental.list_physical_devices('GPU')))
print('GPU显卡信息:')
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

python版本： 3.7.9 (default, Aug 31 2020, 17:10:11) [MSC v.1916 64 bit (AMD64)]
tensorlfow版本: 2.1.0
可用GPU数量: 1
GPU显卡信息:
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5



基于迭代生成风格迁移图片，是将迁移图片作为参数，通过模型的训练不断更新参数，这是一个模型训练的过程，因此该方法速度缓慢。试想如果可以构建一个带有图像风格的转换网络，将内容图像作为这个网络的输入，输出的是风格迁移后图像，这就会很快。以上就是快速风格迁移的思路，参考论文[Perceptual Losses for Real-Time Style Transferand Super-Resolution](https://arxiv.org/pdf/1603.08155v1.pdf)以及[补充材料](https://static-content.springer.com/esm/chp%3A10.1007%2F978-3-319-46475-6_43/MediaObjects/419974_1_En_43_MOESM1_ESM.pdf)。

### 1、快速风格迁移原理


原理：对于给定的一个风格图片，将大量的内容图片作为输入数据集，并按照基于迭代的风格迁移的方式计算损失，更新模型参数训练模型。模型训练完毕后，将任意一个内容图像输入进这个训练好的模型后，就可以直接输出带有给定风格的风格迁移后的图像。 

快速风格迁移的网络结构包含两部分：Image Transform Net（图像转换模型）和Loss Network（计算损失网络），如下图所示：

![11.1.png](11.1.png)

其中上图中的图像转换模型就是一个需要训练的模型，该模型训练完成后，就可以对于任意给定的图像，直接输出带有某种风格的图像；计算损失网络，其实也就是特征提取网络，通过计算图像转换网络的输出图像和给定的风格图片之间的风格损失、和输入图片之间的内容损失，从而为图像转换网络的更新参数提供梯度，一般选择已经训练好的模型。
![11.2.png](11.2.png)


#### 1.1 图像转换模型(Image Transform Net)

该模型一般使用下面的深度残差神经网络：

![11.3.png](11.3.png)

图像转换模型包括1个反射填充层，3个下采样卷积层、5个残差块、3个上采样卷积层。除了最末的输出层以外，所有的卷积层(残差块内除外)后都连接一个Batch Normalization和Relu层。每个残差块的结构如下：

![11.4.png](11.4.png)

#### 1.2 计算损失网络(Loss Network)

和迭代方法的计算是一样的，本文以VGG16为例。直接运行下面的语句下载较慢。模型百度网盘链接，密码。下载完成后，在文件夹地址栏输入%userprofile%并运行，将下载的*.h5文件存放到该文件夹下的.keras/models文件夹中，再次运行下面的下载语句即可。 

In [5]:
# 语句直接下载比较慢
VGG16model = tf.keras.applications.VGG16(include_top=False, weights='imagenet') # 不含全连接层的预训练模型

In [6]:
# 查看模型结构
VGG16model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

### 2、COCO数据集

因为是训练图像转换模型，任意的图片数据集都是可以的，本文中用的是[COCO验证数据集](http://images.cocodataset.org/zips/val2014.zip)，大小6.2GB，共包括40504张图片。

### 3、运行配置

激活虚拟环境后，安装对应版本的tensorflow-addons。
```
pip install tensorflow-addons==0.9.1
```

+ VGG16模型计算风格损失的层以及权重

In [7]:
StyleLossLayer =  ['block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3']
StyleWeight = 10

+ VGG16模型计算内容损失的层以及权重

In [8]:
ContentLossLayer = ['block3_conv3']
ContentWeight = 12

+ 总变分损失权重

In [9]:
TotalWeight = 1e-5

+ COCO数据集路径

In [10]:
COCODataset = r'C:\Users\Administrator\Desktop\28tensorflow\val2014'

+ 存储训练的图像转换模型

In [11]:
SaveModelPath = r'C:\Users\Administrator\Desktop\28tensorflow\style_model'

### 4、构建图像转换模型

提升风格迁移图片质量的方法：
 + 用 Instance Normalization来代替通常的Batch Normalization，可以改善风格迁移的质量；风格转移的过程，就是要把风格图像的对比度转移到内容图像上，因此需要先去除内容图像本身的对比度。归一化操作其实就是在去除这种对比度，scale+shift则是将风格图像的对比度附加给内容图像。但是如果采用BN，计算出来的平均值和方差是整个batch内所有图像的均值和方差，而对于单张图像而言，其本身的均值和方差才是最能反映它的对比度的，因此BN会导致对比度去除得不彻底。因此Instance Normalization更加有效。

In [12]:
# 反射填充
class ReflectionPadding2D(tf.keras.layers.Layer):
    def __init__(self, padding=1, **kwargs):
        super(ReflectionPadding2D, self).__init__(**kwargs)
        self.padding = padding

    def compute_output_shape(self, s):
        return s[0], s[1] + 2 * self.padding, s[2] + 2 * self.padding, s[3]

    def call(self, x):
        return tf.pad(x,[[0, 0],[self.padding, self.padding],[self.padding, self.padding],[0, 0],],'REFLECT')

# 卷积，
class ConvLayer(tf.keras.layers.Layer):
    def __init__(self, channels, kernel_size=3, strides=1):
        super(ConvLayer, self).__init__()
        reflection_padding = kernel_size // 2
        self.reflection_pad = ReflectionPadding2D(reflection_padding)
        self.conv2d = Conv2D(channels, kernel_size, strides=strides)

    def call(self, x):
        x = self.reflection_pad(x)
        x = self.conv2d(x)
        return x

# 上采样卷积
class UpsampleConvLayer(tf.keras.layers.Layer):
    def __init__(self, channels, kernel_size=3, strides=1, upsample=2):
        super(UpsampleConvLayer, self).__init__()
        reflection_padding = kernel_size // 2
        self.reflection_pad = ReflectionPadding2D(reflection_padding)
        self.conv2d = Conv2D(channels, kernel_size, strides=strides)
        self.up2d = UpSampling2D(size=upsample)

    def call(self, x):
        x = self.up2d(x)
        x = self.reflection_pad(x)
        x = self.conv2d(x)
        return x

# 残差块
class ResidualBlock(tf.keras.Model):
    def __init__(self, channels, strides=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = ConvLayer(channels, kernel_size=3, strides=strides)
        self.in1 = InstanceNormalization()
        self.conv2 = ConvLayer(channels, kernel_size=3, strides=strides)
        self.in2 = InstanceNormalization()

    def call(self, inputs):
        residual = inputs

        x = self.in1(self.conv1(inputs))
        x = tf.nn.relu(x)

        x = self.in2(self.conv2(x))
        x = x + residual
        return x

### 4.1 按照1.1节中给出的模型结构图构建模型

In [13]:
# 图像转换模型
class TransformerNet(tf.keras.Model):
    def __init__(self):
        super(TransformerNet, self).__init__()
        
        self.conv1 = ConvLayer(32, kernel_size=9, strides=1)
        self.in1 = InstanceNormalization()
        self.conv2 = ConvLayer(64, kernel_size=3, strides=2)
        self.in2 = InstanceNormalization()
        self.conv3 = ConvLayer(128, kernel_size=3, strides=2)
        self.in3 = InstanceNormalization()

        self.res1 = ResidualBlock(128)
        self.res2 = ResidualBlock(128)
        self.res3 = ResidualBlock(128)
        self.res4 = ResidualBlock(128)
        self.res5 = ResidualBlock(128)

        self.deconv1 = UpsampleConvLayer(64, kernel_size=3, strides=1, upsample=2)
        self.in4 = InstanceNormalization()
        self.deconv2 = UpsampleConvLayer(32, kernel_size=3, strides=1, upsample=2)
        self.in5 = InstanceNormalization()
        self.deconv3 = ConvLayer(3, kernel_size=9, strides=1)
        self.in6 = InstanceNormalization()
        
        self.relu = ReLU()

    def call(self, x):
        x = self.relu(self.in1(self.conv1(x)))  
        x = self.relu(self.in2(self.conv2(x)))
        x = self.relu(self.in3(self.conv3(x)))
        x = self.res1(x)
        x = self.res2(x)
        x = self.res3(x)
        x = self.res4(x)
        x = self.res5(x)
        x = self.relu(self.in4(self.deconv1(x)))
        x = self.relu(self.in5(self.deconv2(x)))
        x = self.in6(self.deconv3(x))
        # 确保输出的数据在[0, 255]之间
        x = (tf.nn.tanh(x)+ 1) * 255 / 2
        return x

### 4.2 选择VGG16预训练模型作为计算损失的网络

In [14]:
class StyleContentModel(tf.keras.models.Model):
    def __init__(self, style_layers, content_layers, vgg):
        super(StyleContentModel, self).__init__()
        vgg.trainable = False

        style_outputs = [vgg.get_layer(name).output for name in style_layers]
        content_outputs = [vgg.get_layer(name).output for name in content_layers]

        self.vgg = tf.keras.Model([vgg.input], [style_outputs, content_outputs])
        self.vgg.trainable = False

    def call(self, inputs):
        preprocessed_input = tf.keras.applications.vgg16.preprocess_input(inputs)
        style_outputs, content_outputs = self.vgg(preprocessed_input)
        return style_outputs, content_outputs

### 5、构建图像数据集

In [15]:
def buildfigdataset(figpath, figsize, batchsize):
    img_names = os.listdir(figpath)
    # 进行路径拼接
    img_list = [os.path.join(figpath, img_name) for img_name in img_names]
    ds = tf.data.Dataset.from_tensor_slices((img_list))
    def load_and_preprocess_from_path_label(path):
        image = tf.io.read_file(path)  # 读取图片
        # 保持通道数
        image = tf.image.decode_jpeg(image, channels=3)
        # 图片重新调整大小
        image = tf.image.resize(image, [figsize, figsize]) 
        return image
    image_ds  = ds.map(load_and_preprocess_from_path_label)
    # 转换批次
    batch_ds = image_ds.batch(batchsize)
    return batch_ds

### 6、模型训练与保存

模型的唯一性由风格图片名称, 风格、内容、总分损失权重确定，对于同样的参数设置可实现继续训练。

In [16]:
class FASTSTMODEL:
    
    def __init__(self, stylefig, modelsavepath=SaveModelPath, model=VGG16model, 
                 contentfigpath=COCODataset, stylelayer=StyleLossLayer, contentlayer=ContentLossLayer,
                 styleweight=StyleWeight, contentweight=ContentWeight, totalweight=TotalWeight,
                 figsize=256, lr=0.003, epochs=10, batchs=4):
        # 风格图片
        self.stylefig = stylefig
        
        # 模型存储的路径
        self.modelsavepath = modelsavepath
        
        # 下载的模型
        self.model = model
        # COCO图片数据集
        self.contentfigpath =contentfigpath
        
        # 模型训练参数
        self.figsize =figsize
        self.lr= lr
        self.epochs = epochs
        self.batchs =batchs
        
        # 计算损失函数的参数
        self.stylelayer = stylelayer
        self.contentlayer = contentlayer
        
        # 损失权重
        self.styleweight = styleweight
        self.contentweight = contentweight
        self.totalweight = totalweight
        
        # 图片数据集
        self.figds = buildfigdataset(self.contentfigpath, self.figsize, self.batchs)
        # 风格图片
        self.style_image = loadimg(self.stylefig)
    
        # 将所有训练的误差存储为文件
        self.styleloss = []
        self.contentloss = []
        
        # 获取风格图片的文件 
        self.getname()
        
        # 储存总损失的txt文件名
        self.savetotalloss = '%s_savetotalloss.txt' % self.fignameckpt
        
    # 获取图片名称
    def getname(self):
        self.fignameckpt = os.path.basename(self.stylefig).split('.')[0]
        
    # 读取前一次训练的最终总损失
    def getlastloss(self):
        with open(r'%s/%s' % (self.modelsavepath, self.savetotalloss), 'r') as d:
            f = d.readlines()
            if f:
                return float(f[-1])
            else:
                return False
            
    # 存储最终总损失
    def savelastloss(self, loss):
        with open(r'%s/%s' % (self.modelsavepath, self.savetotalloss), 'a') as d:
            d.write(str(loss)+'\n')

        
    # 训练保存模型，
    def train_fast_style_model(self):

        # 损失网络的输出
        extractor = StyleContentModel(self.stylelayer, self.contentlayer, self.model)
        
        # 图像转换模型
        transformer = TransformerNet()

        # 计算风格图片的Gram矩阵
        style_features, _ = extractor(self.style_image)
        gram_style = [grammatrix(x) for x in style_features]
        
        # 优化器tf.train.Checkpoint
        optimizer = tf.optimizers.Adam(learning_rate=self.lr)
        # 声明
        ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, transformer=transformer)
        # 管理检查点
        log_dir = os.path.join(self.modelsavepath, 'figname=%s_sw=%s_cw=%s_tw=%s'% 
                               (self.fignameckpt, self.styleweight, self.contentweight, self.totalweight))
        manager = tf.train.CheckpointManager(ckpt, log_dir, max_to_keep=1)
    
        if manager.latest_checkpoint:
            # 如果有模型则恢复模型
            ckpt.restore(manager.latest_checkpoint)
        else:
            # 首次训练
            # 建立一个储存每次训练最终总损失的txt文件
            with open(r'%s/%s' % (self.modelsavepath, self.savetotalloss), 'w') as f:
                f.write('')
            
        def train_step(images):
            with tf.GradientTape() as tape:
                # 输入的图片数据集的 图像转换模型的输出
                transformed_images = transformer(images)
                
                # 损失网络的内容输出
                _, content_features = extractor(images)
                
                # 输入的图片数据集的 图像转换模型的输出
                style_transformed, content_transformed = extractor(transformed_images)
                
                # 风格损失：风格图片和和经过图像转换模型转换后的图像数据集之间的
                style_loss = self.styleweight * stylelossfunc(gram_style, style_transformed)
                # 内容损失：内容图片和经过图像转换模型转换后的的图像数据集之间的
                content_loss = self.contentweight * contentlossfunc(content_features, content_transformed)
                # 总分损失：
                total_variation_loss = self.totalweight * totalvariationloss(transformed_images)
                # 总的损失
                loss = style_loss + content_loss + total_variation_loss
             
            # 梯度
            gradients = tape.gradient(loss, transformer.trainable_variables)
            # 更新参数
            optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
            
            # 存储
            self.styleloss.append(style_loss.numpy())
            self.contentloss.append(content_loss.numpy())
            #print('风格损失:', style_loss.numpy(), '内容损失:', content_loss.numpy(), '总分损失:', total_variation_loss.numpy())
            return loss.numpy()
        
        # 开始训练
        for epoch in range(self.epochs):
            for images in self.figds:
                sumloss = train_step(images)
                #print('总损失：', sumloss)
                ckpt.step.assign_add(1)
                step = int(ckpt.step)
                
                # 读取存储的损失
                savedloss = self.getlastloss()
                
                if savedloss:
                    print('代数%s-%s'%(self.epochs, epoch), 'step数%s'%step, '总损失：', sumloss, )
                    print('保存模型', manager.save())
                    # 存储损失
                    self.savelastloss(sumloss)
                else:
                    print('保存模型', manager.save())
                    self.savelastloss(sumloss)                   

针对不同的风格图片建立不同的模型。

In [None]:
StyleFig1 = r'C:\Users\Administrator\Desktop\28tensorflow\style1.jpg'
# 进行图像转换模型的训练
fase_trans_model = FASTSTMODEL(StyleFig1, epochs=3)
fase_trans_model.train_fast_style_model()

In [None]:
StyleFig2 = r'C:\Users\Administrator\Desktop\28tensorflow\style2.jpg'
# 进行图像转换模型的训练
fase_trans_model = FASTSTMODEL(StyleFig2, epochs=3)
fase_trans_model.train_fast_style_model()

In [None]:
StyleFig3 = r'C:\Users\Administrator\Desktop\28tensorflow\style3.jpg'
# 进行图像转换模型的训练
fase_trans_model = FASTSTMODEL(StyleFig3, epochs=3)
fase_trans_model.train_fast_style_model()

In [None]:
StyleFig4 = r'C:\Users\Administrator\Desktop\28tensorflow\style4.jpg'
# 进行图像转换模型的训练
fase_trans_model = FASTSTMODEL(StyleFig4, epochs=3)
fase_trans_model.train_fast_style_model()

### 7、转换图片

+ 需要转换的图片的存储路径

In [21]:
InFigPath = r'C:\Users\Administrator\Desktop\28tensorflow\infig'

In [22]:
class FIGFST(FASTSTMODEL):
    # 初始化
    def __init__(self, stylefig, transfigpath):
        super(FIGFST, self).__init__(stylefig)
        self.tsfp = transfigpath
        self.otfp = self.tsfp + '_' +self.fignameckpt
        
        # 限制图片分辨率
        self.maxpixel = 800
        
        self.checkoutpath()
    
    # 如果存在输出文件夹，则清空，不存在则新建
    def checkoutpath(self):
        # 
        if os.path.exists(self.otfp):
            shutil.rmtree(self.otfp)
        os.mkdir(self.otfp)
        
        
    # 读取图片数据
    def loadimgplus(self,figpath):
        img = tf.io.read_file(figpath)
        img = tf.image.decode_image(img, channels=3)
        img = tf.cast(img, tf.float32)
        
        height, width, _ = img.shape
        
        maxh = max(height, width)
        if height > self.maxpixel or width > self.maxpixel:
            # 等比例缩放
            img = tf.image.resize(img, [int(self.maxpixel/maxh*height), int(self.maxpixel/maxh*width)]) 
        imglast = img[tf.newaxis, :]
        return imglast

    # 加载最新的模型
    def faststyletransfer(self):
        for imagefp in os.listdir(self.tsfp):
            figpath = os.path.join(self.tsfp, imagefp)
            image = self.loadimgplus(figpath)

            # 引入图像转换模型
            transformer = TransformerNet()
            # 声明
            ckpt = tf.train.Checkpoint(transformer=transformer)
            # 恢复模型参数
            ckpapath = os.path.join(self.modelsavepath,  'figname=%s_sw=%s_cw=%s_tw=%s'% 
                                    (self.fignameckpt, self.styleweight, self.contentweight, self.totalweight))
        
            ckpt.restore(tf.train.latest_checkpoint(ckpapath)).expect_partial()

            transformed_image = transformer(image)
           
            transformed_image = tf.cast(tf.squeeze(transformed_image), tf.uint8).numpy()
        
            img = Image.fromarray(transformed_image, mode='RGB')

            img.save(r'%s/trans_%s' % (self.otfp, imagefp))
        print('转换完毕')
    

进行图片的转换

In [26]:
figstyles = FIGFST(StyleFig4, InFigPath)
figstyles.faststyletransfer()

转换完毕


### 8、动图、视频转换

将本地视频文件或者动图按照帧转化为图片，然后再将风格迁移后的图片连接成动图。

In [28]:
from PIL import Image, ImageSequence  # 动图提取图片
import cv2  # 视频转成图片
import imageio # 图片链接成gif
import shutil # 清空文件夹

class FASTST:
    
    def __init__(self, stylefig, videogifpath, infigpath):
        
        self.styf = stylefig
        
        self.vidp = videogifpath
        self.infp = infigpath
        self.oufp = self.infp  + '_' + os.path.basename(self.styf).split('.')[0]
        
        # 图片转换
        self.figmodel = FIGFST(self.styf, self.infp)
        
        # 转换
        self.judgevideogif()
    
    # 根据视频还是动图选择
    def judgevideogif(self):
        name = self.vidp.split('.')[-1]
        if name == 'gif':
            self.parsegif()
        else:
            self.video2figure()
    
    # 视频转为图片
    def video2figure(self, time_interval=10):
        # 清空文件夹中的文件
        shutil.rmtree(self.infp)
        os.mkdir(self.infp)
        
        fig_list = []
        vidcap = cv2.VideoCapture(self.vidp)
        success, image = vidcap.read()
        count = 0
        while success:
            success, image = vidcap.read()
            if count % time_interval == 0:
                if image is None:
                    break
                cv2.imencode('.jpg', image)[1].tofile('%s/fig_%d.jpg' % (self.infp, count))
                fig_list.append('fig_%d.jpg' % count)
            count += 1
        print('视频转换为图片，开始转换')
        
        # 清空文件夹中的文件
        shutil.rmtree(self.oufp)
        os.mkdir(self.oufp)
        
        # 开始转换
        self.figmodel.faststyletransfer()
        return print('转换完成')
    
    # 提取gif动图变为图片
    def parsegif(self):
        # 清空文件夹中的文件
        shutil.rmtree(self.infp)
        os.mkdir(self.infp)
        
        # 读取GIF
        im = Image.open(self.vidp)
        # GIF图片流的迭代器
        iterfigs = ImageSequence.Iterator(im)
        # 获取文件名
        count = 1

        # 遍历图片流的每一帧
        for frame in iterfigs:
            frame.save('%s/fig_%d.png' % (self.infp, count))
            count += 1
        print('动图转换为图片，开始转换')
        
        # 清空文件夹中的文件
        shutil.rmtree(self.oufp)
        os.mkdir(self.oufp)
        
        # 开始转换
        self.figmodel.faststyletransfer()
        return print('转换完成')
        
    # 将文件夹子中的图片链接成动图
    def create_gif(self, duration=0.1):
        name = os.path.basename(self.styf).split('.')[0] + '.gif'
        frames = []
        for image_name in os.listdir(self.oufp):
            frames.append(imageio.imread('%s/%s' % (self.oufp, image_name)))
        imageio.mimsave(name, frames, 'GIF', duration=duration)
        return print('动图生成完成')

In [36]:
# 视频路径
videopath = r'C:\Users\Administrator\Desktop\28tensorflow\3.gif'
videoinpath = r'C:\Users\Administrator\Desktop\28tensorflow\videoin'
videost = FASTST(StyleFig4, videopath, videoinpath)
videost.create_gif()

动图转换为图片，开始转换
转换完毕
转换完成
动图生成完成
