In [25]:
import torch 
from PIL import Image
import torchvision.transforms as transforms
from torch.autograd import Variable
from torchvision import models
import torch.nn as nn


# 加载数据

In [12]:
imsize=512
is_cuda=torch.cuda.is_available()

#转换图片使其适于VGG模型的训练，图片预处理
prep=transforms.Compose([transforms.Resize(imsize),
                         transforms.ToTensor(),
                         transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])]),  #变成BGR
                         transforms.Normalize(mean=[0.40760392,0.45795686,0.48501961],std=[1,1,1]),
                         transforms.Lambda(lambda x: x.mul_(1./255)),
                        ])

#将生成的图片转换回可以呈现的格式，模型的输出需要反归一化到初始值
postpa=transforms.Compose([transforms.Lambda(lambda x: x.mul_(1./255)),
                           transforms.Normalize(mean=[-0.40760392,-0.45795686,-0.48501961],std=[1,1,1]),
                           transforms.Lambda(lambda x: x[torch.LongTensor([2,1,0])])
                          ])
postpb=transforms.Compose([transforms.ToPILImage()])

#确保图片数据在[0,1]的范围，因为生成模型可能有超出可接受范围的值
def postp(tensor):
    t=postpa(tensor)
    t[t>1]=1
    t[t<0]=0
    img=postpb(t)
    return img

#加载图片，对图片预处理转换并存入变量
def image_loader(image_name):
    image=Image.open(image_name)
    image=Variable(prep(image))
    image=image.unsqueeze(0)
    return image

In [13]:
style_img=image_loader('C:/Users/user/Desktop/star.jpg')
content_img=image_loader('C:/Users/user/Desktop/house.jpeg')

In [14]:
#创建内容图片,后续将使用优化器优化opt_img
opt_img=Variable(content_img.data.clone(),requires_grad=True)

# 加载与训练模型提取特征

In [16]:
vgg=models.vgg16(pretrained=True).features
for param in vgg.parameters():
    param.requires_grad=False

# 内容损失:在特定层的输出上计算均方误差

In [22]:
#通过传入图片返回特定层的输出

In [26]:
target_layer=dummy_fn(content_img)
noise_layer=dummy_fn(noise_img)

criterion=nn.MSELoss()
content_loss=criterion(target_layer,noise_layer)

NameError: name 'dummy_fn' is not defined

# 风格损失：在每个特征平面生成的gram矩阵的均方误差，gram矩阵表示特征之间的相关性度量

In [27]:
#对每一通道，把所有值扁平化成一个向量或张量
class GramMatrix(nn.Module):
    def forward(self,input):
        b,c,h,w=input.size()  #批，通道，高，宽
        features=input.view(b,c,h*w)  
        gram_matrix=torch.bmm(features,features.transpose(1,2))   #批矩阵相乘torch.bmm
        gram_matrix.div_(h*w)
        return gram_matrix

In [53]:
class StyleLoss(nn.Module):
    def forward(self,inputs,targets):
        out=nn.MSELoss()(GramMatrix()(inputs),targets)
        return out

# 提取损失

In [54]:
class LayerActiavtions():
    features=[]
    def __init__(self,model,layer_nums):
        self.hooks=[]
        for layer_num in layer_nums:
            self.hooks.append(model[layer_num].register_forward_hook(self.hook_fn))  #对层进行迭代并对拉取输出所需要的forward hook进行注册
            
    def hook_fn(self,module,input,output):
        self.features.append(output) #捕捉输出并存到features数组
        
    def remove(self): #不需要捕捉输出之后，需要调用remove函数，防止内存溢出异常
        for hook in self.hooks:
            hook.remove()

In [55]:
#用于风格和内容图片提取输出
def extract_layers(layers,img,model=None):
    la=LayerActiavtions(model,layers)
    #清空缓存
    la.features=[]
    out=model(img)
    la.remove()
    return la.features

In [56]:
#提取风格和内容图片的对象
content_targets=extract_layers(content_layers,content_img,model=vgg)
style_targets=extract_layers(style_layers,style_img,model=vgg)

In [57]:
#提取对象后，需要将输出和创建它们的图片解绑
ontent_targets=[t.detach() for t in content_targets]
style_targets=[GramMatrix()(t).detach() for t in style_targets]
#解绑后，把所有对象加入到一个列表
targets=style_targets+content_targets

In [58]:
#需要提取的层
style_layers=[1,6,11,20,25]
content_layers=[21]
loss_layers=style_layers+content_layers

#使用到的权重
style_weights=[1e3/n**2 for n in [64,128,256,512,512]]
content_weights=[1e0]
weights=style_weights+content_weights

# 创建loss层

In [59]:
loss_fns=[StyleLoss()]*len(style_layers)+[nn.MSELoss()]*len(content_layers)

# 创建优化器

In [60]:
optimizer=torch.optim.LBFGS([opt_img])

# 训练模型

In [62]:
max_iter=500
show_iter=50
n_iter=[0]

while n_iter[0]<=max_iter:
    def closure():
        optimizer.zero_grad()
        out=extract_layers(loss_layers,opt_img,model=vgg)
        layer_losses=[weights[a] * loss_fns[a](A,targets[a]) for a,A in enumerate(out)]
        loss=sum(layer_losses)
        loss.backward()
        n_iter[0]+=1
        #打印损失值
        if n_iter[0]%show_iter == (show_iter-1):
            print('Iteration: %d ,loss: %f' % (n_iter[0]+1,loss.data[0]))
        return loss
    optimizer.step(closure)

RuntimeError: The size of tensor a (44) must match the size of tensor b (88) at non-singleton dimension 3

In [68]:
use_cuda=torch.cuda.is_available()
# desired size of the output image
imsize = 512 if use_cuda else 128  # use small size if no gpu

loader = transforms.Compose([
    transforms.Resize(imsize),  # scale imported image
    transforms.ToTensor()])  # transform it into a torch tensor

loader_new = transforms.Compose([ # 通过loader_new 可以将任意大小图像剪裁到相同大小
    transforms.Resize(imsize),
    transforms.RandomCrop(imsize),
    transforms.ToTensor()])


def image_loader(image_name):
    image = Image.open(image_name)
    image = Variable(loader(image))
    # fake batch dimension required to fit network's input dimensions
    image = image.unsqueeze(0)
    return image

style_img=image_loader('C:/Users/user/Desktop/star.jpg')
content_img=image_loader('C:/Users/user/Desktop/house.jpeg')

assert style_img.size() == content_img.size(), \
    "we need to import style and content images of the same size"

AssertionError: we need to import style and content images of the same size