# Morphing with Generator and Bottleneck

In [2]:
import torch
from torch import nn
import torchvision.utils as vutils
from torch.optim import Adam
from torch.utils.data import DataLoader,Dataset
import glob
from torchvision import transforms, datasets
from torchvision.utils import save_image
import os
import random
from PIL import Image
import itertools
import matplotlib.pyplot as plt
from torchvision.models import vgg19, resnet18
from torch.autograd import Variable
import numpy as np

In [3]:
class Discriminator_patch(nn.Module):
    def __init__(self, input_nc):
        super(Discriminator_patch, self).__init__()

        model = [
            nn.Conv2d(input_nc, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True)
        ]

        model += [
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True)
        ]

        model += [
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True)
        ]

        model += [
            nn.Conv2d(256, 512, kernel_size=4, padding=1, bias=False),
            nn.InstanceNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True)
        ]

        model += [nn.Conv2d(512, 1, kernel_size=4, padding=1)]

        self.model = nn.Sequential(*model)

    def forward(self, x):
        return self.model(x)
    
class Discriminator_classify(nn.Module):
    def __init__(self, input_nc):
        super(Discriminator_classify, self).__init__()

        # 之前的卷积层保持不变
        self.model = nn.Sequential(
            nn.Conv2d(input_nc, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, kernel_size=4, stride=1, padding=1, bias=False),
            nn.InstanceNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True)
        )

        # 添加一个全局平均池化层
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # 最后一个卷积层，将特征图压缩为1个值
        self.final_conv = nn.Conv2d(512, 1, kernel_size=1)

        # 选择性添加，如果您需要输出概率
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.model(x)
        x = self.global_avg_pool(x)
        x = self.final_conv(x)
        x = self.sigmoid(x)  # 如果您需要输出概率
        return x.view(-1)  # 改变输出形状以匹配期望的输出形

In [4]:
transform = transforms.Compose([
    transforms.Resize(int(128*1.12), Image.BICUBIC),
    transforms.RandomCrop(128),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [5]:
transform2 = transforms.Compose([
    transforms.Resize((256, 256)),  # 根据你的模型调整尺寸
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [6]:
class ImageDataset(Dataset):
    def __init__(self, root='data', transforms_=None, unaligned=False, mode="train"):          ## (root = "./datasets/facades", unaligned=True:非对其数据)
        self.transform = transforms_                             ## transform变为tensor数据
        self.unaligned = unaligned

        self.files_A = sorted(glob.glob(os.path.join(root, "%sA" % mode) + "/*.*"))     ## "./datasets/facades/trainA/*.*"
        self.files_B = sorted(glob.glob(os.path.join(root, "%sB" % mode) + "/*.*"))     ## "./datasets/facades/trainB/*.*"

    def __getitem__(self, index):
        image_A = Image.open(self.files_A[index % len(self.files_A)])                   ## 在A中取一张照片

        if self.unaligned:                                                              ## 如果采用非配对数据，在B中随机取一张
            image_B = Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])
        else:
            image_B = Image.open(self.files_B[index % len(self.files_B)])

        # # 如果是灰度图，把灰度图转换为RGB图
        # if image_A.mode != "RGB":
        #     image_A = to_rgb(image_A)
        # if image_B.mode != "RGB":
        #     image_B = to_rgb(image_B)
        
        # 把RGB图像转换为tensor图, 方便计算，返回字典数据
        item_A = self.transform(image_A)
        item_B = self.transform(image_B)
        return item_A, item_B

    ## 获取A,B数据的长度
    def __len__(self):
        return max(len(self.files_A), len(self.files_B))


In [7]:
class ImageDataset(Dataset):
    def __init__(self, root='data', transforms_=None, unaligned=False, mode="train"):          ## (root = "./datasets/facades", unaligned=True:非对其数据)
        self.transform = transforms_                             ## transform变为tensor数据
        self.unaligned = unaligned

        self.files_A = sorted(glob.glob(os.path.join(root, mode) + "/*.*"))     ## "./datasets/facades/trainA/*.*"
        self.files_B = sorted(glob.glob(os.path.join(root, '%sB' % mode) + "/*.*"))     ## "./datasets/facades/trainB/*.*"

    def __getitem__(self, index):
        image_A = Image.open(self.files_A[index % len(self.files_A)])                   ## 在A中取一张照片

        # if self.unaligned:                                                              ## 如果采用非配对数据，在B中随机取一张
        #     image_B = Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])
        # else:
        #     image_B = Image.open(self.files_B[index % len(self.files_B)])

        # # 如果是灰度图，把灰度图转换为RGB图
        # if image_A.mode != "RGB":
        #     image_A = to_rgb(image_A)
        # if image_B.mode != "RGB":
        #     image_B = to_rgb(image_B)
        
        # 把RGB图像转换为tensor图, 方便计算，返回字典数据
        item_A = self.transform(image_A)
        # item_B = self.transform(image_B)
        return item_A

    ## 获取A,B数据的长度
    def __len__(self):
        return max(len(self.files_A), len(self.files_B))


In [8]:
BATCH_SIZE = 32
train_dataloader = DataLoader(        ## 改成自己存放文件的目录
    ImageDataset("human_dog", transforms_=transform, unaligned=True, mode="combined"),  ## "./datasets/facades" , unaligned:设置非对其数据
    batch_size=BATCH_SIZE,                                                                  ## batch_size = 1
    shuffle=True,
)
test_dataloader = DataLoader(        ## 改成自己存放文件的目录
    ImageDataset("human_dog", transforms_=transform2, unaligned=True, mode="test"),  ## "./datasets/facades" , unaligned:设置非对其数据
    batch_size=BATCH_SIZE,                                                                  ## batch_size = 1
    shuffle=False,
)


In [9]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_features):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_features, in_features, kernel_size=3, padding=1, bias=False)
        self.norm1 = nn.InstanceNorm2d(in_features)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_features, in_features, kernel_size=3, padding=1, bias=False)
        self.norm2 = nn.InstanceNorm2d(in_features)

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.norm1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.norm2(out)
        return identity + out

class DownsampleBlock(nn.Module):
    def __init__(self, in_features, out_features):
        super(DownsampleBlock, self).__init__()
        self.conv = nn.Conv2d(in_features, out_features, kernel_size=3, stride=2, padding=1, bias=False)
        self.norm = nn.InstanceNorm2d(out_features)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x)
        x = self.relu(x)
        return x

class UpsampleBlock(nn.Module):
    def __init__(self, in_features, out_features):
        super(UpsampleBlock, self).__init__()
        self.conv = nn.ConvTranspose2d(in_features, out_features, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False)
        self.norm = nn.InstanceNorm2d(out_features)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x)
        x = self.relu(x)
        return x

class Generator(nn.Module):
    def __init__(self, input_nc, output_nc, n_residual_blocks=6):
        super(Generator, self).__init__()
        self.initial_conv = nn.Sequential(
            nn.Conv2d(input_nc, 64, kernel_size=7, padding=3, bias=False),
            nn.InstanceNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.downsample_blocks = nn.ModuleList([
            DownsampleBlock(64, 32),
            # DownsampleBlock(32, 16),
            DownsampleBlock(32, 2)
        ])
        self.residual_blocks = nn.Sequential(
            *[ResidualBlock(2) for _ in range(n_residual_blocks)]
        )
        self.upsample_blocks = nn.ModuleList([
            UpsampleBlock(2, 32),
            # UpsampleBlock(16, 32),
            UpsampleBlock(32, 64)
        ])
        self.output_conv = nn.Sequential(
            nn.Conv2d(64, output_nc, kernel_size=7, padding=3),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.initial_conv(x)
        # print("1",x.shape)
        for down_block in self.downsample_blocks:
            x = down_block(x)
        # print("2",x.shape)
        x = self.residual_blocks(x)
        # print("3",x.shape)
        for up_block in self.upsample_blocks:
            x = up_block(x)
        # print("4",x.shape)
        x = self.output_conv(x)
        # print("5",x.shape)
        return x


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

G_AB = Generator(input_nc=3, output_nc=3).to(device)
G_BA = Generator(input_nc=3, output_nc=3).to(device)
D_A = Discriminator_classify(input_nc=3).to(device)
D_B = Discriminator_classify(input_nc=3).to(device)

D_A_P = Discriminator_patch(input_nc=3).to(device)
D_B_P = Discriminator_patch(input_nc=3).to(device)

# Optimizers
optimizer_G = Adam(G_AB.parameters(), lr=0.0001, betas=(0.5, 0.999))
optimizer_D_A = Adam(D_A.parameters(), lr=0.0004, betas=(0.5, 0.999))
optimizer_D_B = Adam(D_B.parameters(), lr=0.0004, betas=(0.5, 0.999))

optimizer_D_A_P = Adam(D_A_P.parameters(), lr=0.0004, betas=(0.5, 0.999))
optimizer_D_B_P = Adam(D_B_P.parameters(), lr=0.0004, betas=(0.5, 0.999))

G_AB.train()
G_BA.train()
D_A.train()
D_B.train()
D_A_P.train()
D_B_P.train()

# Losses
criterion_GAN = nn.MSELoss()
criterion_cycle = nn.L1Loss()
criterion_identity = nn.L1Loss()


output_dir = './cyclegan_images'
os.makedirs(output_dir, exist_ok=True)

In [11]:
# 加载ResNet模型
resnet = resnet18(pretrained=True)

# 获取全连接层之前的特征提取部分
features = nn.Sequential(*list(resnet.children())[:-1])

# 定义新的全连接层和ReLU激活函数
num_ftrs = resnet.fc.in_features
fc_layer = nn.Linear(num_ftrs, 256)
relu = nn.ReLU(inplace=True)

# 定义模型结构
class CustomResNet(nn.Module):
    def __init__(self, features, fc_layer, relu, num_classes=1000):
        super(CustomResNet, self).__init__()
        self.features = features
        self.fc_layer = fc_layer
        self.relu = relu
        self.fc_out = nn.Linear(256, num_classes)  # 输出层

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layer(x)
        x = self.relu(x)
        x = self.fc_out(x)
        return x



In [12]:
resnet = resnet18(pretrained=True)
# num_ftrs = resnet.fc.in_features
# resnet.fc = nn.Linear(num_ftrs, 256)
# resnet.fc = nn.Linear(32168, 65536)
# resnet = torch.load("models\\animal_rec.pth")
# resnet.fc_out = torch.nn.Identity()
# resnet.relu = torch.nn.Identity()
resnet.fc = torch.nn.Identity()
resnet = resnet.to(device)
resnet.eval()
print(resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:


class PerceptualLoss(nn.Module):
    def __init__(self):
        super(PerceptualLoss, self).__init__()
        # self.vgg = vgg19(pretrained=True).features[:21]  # 只使用到第三个池化层之前的部分
        self.resnet = resnet
        self.loss = nn.MSELoss()

        # 冻结VGG参数
        for param in self.resnet.parameters():
            param.requires_grad = False

    def forward(self, generated, target):

        self.resnet = self.resnet.to(device)
        gen_features = self.resnet(generated)

        target_features = self.resnet(target)

        return self.loss(gen_features, target_features)

# 实例化感知损失
perceptual_loss = PerceptualLoss()


In [14]:
# resnet1 = torch.load("models\\my_animal_rec.pth")
# resnet1 = resnet1.to(device)
# resnet1.fc_out = torch.nn.Identity()
# resnet1.flatten = torch.nn.Identity()
# resnet1.fc_outpout = torch.nn.Identity()
# resnet1.average_pool1 = torch.nn.Identity()
# resnet1.average_pool2 = torch.nn.Identity()

# resnet1.eval()
# print(resnet1)

In [189]:
G_AB.train()
for epoch in range(1):
    for i, (real_A) in enumerate(train_dataloader):
        # print(real_A.shape)
        real_A = real_A.to(device)

        # 训练生成器 G_A 和 G_B
        optimizer_G.zero_grad()
        
        # 对抗性损失
        fake_A = G_AB(real_A)
        pred_fake = D_A(fake_A)
        pred_fake_patch = D_A_P(fake_A)
        loss_G_S = criterion_GAN(real_A,fake_A)
        loss_G_P = perceptual_loss(real_A,fake_A)
        # loss_A = criterion_GAN(pred_fake, torch.ones_like(pred_fake))
        # loss_A_P = criterion_GAN(pred_fake_patch, torch.ones_like(pred_fake_patch))
        


        # 总损失
        # loss_G = loss_G_S+loss_G_P+loss_A+loss_A_P
        loss_G = loss_G_S+loss_G_P
        loss_G.backward()
        optimizer_G.step()

        # 训练判别器 D_A
        optimizer_D_A.zero_grad()

        pred_real = D_A(real_A)
        loss_D_real = criterion_GAN(pred_real, torch.ones_like(pred_real))

        pred_fake = D_A(fake_A.detach())
        loss_D_fake = criterion_GAN(pred_fake, torch.zeros_like(pred_fake))

        # 总损失
        loss_D_A = (loss_D_real + loss_D_fake) * 0.5
        loss_D_A.backward()
        optimizer_D_A.step()

        
        # 训练判别器 D_B
        optimizer_D_A_P.zero_grad()

        pred_real = D_A_P(real_A)
        loss_D_real = criterion_GAN(pred_real, torch.ones_like(pred_real))

        pred_fake = D_A_P(fake_A.detach())
        loss_D_fake = criterion_GAN(pred_fake, torch.zeros_like(pred_fake))

        # 总损失
        loss_D_A_P = (loss_D_real + loss_D_fake) * 0.5
        loss_D_A_P.backward()
        optimizer_D_A_P.step()
        
        if i % 10 == 0:
            print(f'Epoch: {epoch}, Batch: {i}, Sample: {i*BATCH_SIZE}, Loss_G: {loss_G.item()}, Loss_D_A: {loss_D_A.item()}')

    # with torch.no_grad():
    #     # 使用测试集中的数据生成图像
    #     for i, (real_A, real_B) in enumerate(test_dataloader):
    #         real_A = real_A.to(device)
    #         fake_B = G_AB(real_A)
    #         vutils.save_image(fake_B, f'{output_dir}/fake_B_epoch_{epoch}_batch_{i}.png', normalize=True)
        


Epoch: 0, Batch: 0, Sample: 0, Loss_G: 2.5264790058135986, Loss_D_A: 0.001318928087130189
Epoch: 0, Batch: 10, Sample: 320, Loss_G: 2.287508249282837, Loss_D_A: 0.0007821845938451588
Epoch: 0, Batch: 20, Sample: 640, Loss_G: 2.728877067565918, Loss_D_A: 0.001232016016729176
Epoch: 0, Batch: 30, Sample: 960, Loss_G: 2.860361099243164, Loss_D_A: 0.0008277383749373257
Epoch: 0, Batch: 40, Sample: 1280, Loss_G: 2.3529469966888428, Loss_D_A: 0.0007132506580092013
Epoch: 0, Batch: 50, Sample: 1600, Loss_G: 2.566781997680664, Loss_D_A: 0.0007759677828289568
Epoch: 0, Batch: 60, Sample: 1920, Loss_G: 2.197385787963867, Loss_D_A: 0.0007282739970833063
Epoch: 0, Batch: 70, Sample: 2240, Loss_G: 2.3435182571411133, Loss_D_A: 0.0008177623385563493
Epoch: 0, Batch: 80, Sample: 2560, Loss_G: 2.4956488609313965, Loss_D_A: 0.0006511450046673417
Epoch: 0, Batch: 90, Sample: 2880, Loss_G: 2.633295774459839, Loss_D_A: 0.0008009803132154047
Epoch: 0, Batch: 100, Sample: 3200, Loss_G: 2.741966724395752, Lo

In [122]:
save_model_path='models'
checkpoint_path = os.path.join(save_model_path, "Simple_CNN2.ckpt")
# checkpoint_path = os.path.join(save_model_path, "Cycle_GAN_Monet2Photo_PerceptualLoss2.ckpt")
torch.save(G_AB.state_dict(), checkpoint_path)
print("Model saved at %s" % checkpoint_path)

Model saved at models\Simple_CNN.ckpt


In [None]:
G_AB = Generator(input_nc=3, output_nc=3).to(device)

In [200]:
G_AB.load_state_dict(torch.load("models\Cycle_GAN_Human2Dog_PerceptualLoss2_id.ckpt"))
G_AB.eval()

RuntimeError: Error(s) in loading state_dict for Generator:
	Unexpected key(s) in state_dict: "residual_blocks.6.conv1.weight", "residual_blocks.6.conv2.weight", "residual_blocks.7.conv1.weight", "residual_blocks.7.conv2.weight", "residual_blocks.8.conv1.weight", "residual_blocks.8.conv2.weight". 
	size mismatch for downsample_blocks.0.conv.weight: copying a param with shape torch.Size([128, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 64, 3, 3]).
	size mismatch for downsample_blocks.1.conv.weight: copying a param with shape torch.Size([256, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 16, 3, 3]).
	size mismatch for residual_blocks.0.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.0.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.1.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.1.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.2.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.2.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.3.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.3.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.4.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.4.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.5.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for residual_blocks.5.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 2, 3, 3]).
	size mismatch for upsample_blocks.0.conv.weight: copying a param with shape torch.Size([256, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 16, 3, 3]).
	size mismatch for upsample_blocks.1.conv.weight: copying a param with shape torch.Size([128, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 64, 3, 3]).

In [190]:
G_AB.eval()

Generator(
  (initial_conv): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
    (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (2): ReLU(inplace=True)
  )
  (downsample_blocks): ModuleList(
    (0): DownsampleBlock(
      (conv): Conv2d(64, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): InstanceNorm2d(16, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
      (relu): ReLU(inplace=True)
    )
    (1): DownsampleBlock(
      (conv): Conv2d(16, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): InstanceNorm2d(3, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
      (relu): ReLU(inplace=True)
    )
  )
  (residual_blocks): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (norm1): InstanceNorm2d(3, eps=1e-05, moment

In [227]:
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((256, 256)),  # 根据你的模型调整尺寸
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = transform(image).unsqueeze(0)  # 添加批次维度
    return image

In [228]:
def predict(model, image1,image2=None):
    # image1 = load_image(image_path1).to(device)
    # image2 = load_image(image_path2).to(device)

    model= model.to(device)
    for i in range(11):
        with torch.no_grad():  # 不计算梯度
            image=image1*(i/10)+image2*(1-i/10)
            output = model(image)
            # vutils.save_image(output, f'test18_3 {i}.png', normalize=True)
            
            output = output.cpu().detach()  # 将图像转移到CPU并脱离计算图
            
            output = (output + 1) / 2  # 将 [-1, 1] 范围调整为 [0, 1]
            output = output.squeeze(0)
            
            image = output.permute(1, 2, 0)  # CHW -> HWC
            image = (image.numpy() * 255).astype(np.uint8)  # 转换为0-255范围的整数
            image_pil = Image.fromarray(image)
            
            # 保存图像
            image_filename = f'test19_3 {i}.png'
            image_pil.save(image_filename)
    return output

In [229]:
image1 = load_image('human_dog\\testA\\200600.jpg').to(device)
image2 = load_image('human_dog\\testB\\flickr_dog_000043.jpg').to(device)

# image1_feature = resnet1(image1)
# image1_feature = image1_feature.unsqueeze(1).unsqueeze(1).expand(-1,-1,256,256)
# image1_with_feature = torch.cat([image1,image1_feature],dim=1)

# image2_feature = resnet1(image2)
# image2_feature = image2_feature.unsqueeze(1).unsqueeze(1).expand(-1,-1,256,256)
# image2_with_feature = torch.cat([image2,image2_feature],dim=1)

output_image = predict(G_AB,image1 ,image2)

# output_image = output_image - output_image.min()
# output_image = output_image / output_image.max()

# output_image = output_image.squeeze()  # 假设输出是图像格式，调整通道
# output_image = output_image.permute(1,2,0)
# output_image=output_image.to('cpu')
# # 步骤 5: 可视化输出图像
# plt.imshow(output_image.numpy())
# plt.title('Output Image')
# plt.show()

In [15]:
G_AB.load_state_dict(torch.load("models\Simple_CNN.ckpt"))
G_AB.eval()

Generator(
  (initial_conv): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
    (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (2): ReLU(inplace=True)
  )
  (downsample_blocks): ModuleList(
    (0): DownsampleBlock(
      (conv): Conv2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): InstanceNorm2d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
      (relu): ReLU(inplace=True)
    )
    (1): DownsampleBlock(
      (conv): Conv2d(32, 2, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): InstanceNorm2d(2, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
      (relu): ReLU(inplace=True)
    )
  )
  (residual_blocks): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (norm1): InstanceNorm2d(2, eps=1e-05, moment

In [16]:
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((256, 256)),  # 根据你的模型调整尺寸
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = transform(image).unsqueeze(0)  # 添加批次维度
    return image

In [17]:
import torch
from torchvision.models import vgg19
from torch.nn.functional import mse_loss


image1 = load_image('human_dog\\testA\\200601.jpg').to(device)
image2 = load_image('human_dog\\testB\\flickr_dog_000043.jpg').to(device)
perceptual_distance = PerceptualLoss()

def calculate_ppl(model, device, image1,image2, steps=10):
    distances = []

    model= model.to(device)

    path_length = 0
    for t in torch.linspace(0, 1, steps=steps):
        with torch.no_grad():  # 不计算梯度
            z_t = (1 - t) * image1 + t * image2
            
            img_t = model(z_t)
            img_t_plus_1 = model((1 - t + 1/steps) * image1 + (t + 1/steps) * image2)

            d = perceptual_distance(img_t, img_t_plus_1)
            
            path_length += d.item()

    distances.append(path_length / (steps - 1))

    ppl = torch.tensor(distances).mean().item()
    return ppl


In [18]:
calculate_ppl(G_AB,device,image1 ,image2 )

0.016179127618670464