In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from torch import nn
import torchvision.utils as vutils
from torch.optim import Adam
from torch.utils.data import DataLoader,Dataset
import glob
from torchvision import transforms, datasets
from torchvision.utils import save_image
import os
import random
from PIL import Image
import itertools
import matplotlib.pyplot as plt
from torchvision.models import vgg19, resnet18

In [21]:
device = 'cuda'

In [22]:
transform = transforms.Compose([
    transforms.Resize(int(64*1.12), Image.BICUBIC),
    transforms.RandomCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [23]:
# class ImageDataset(Dataset):
#     def __init__(self, root='data', transforms_=None, unaligned=False, mode="train"):          ## (root = "./datasets/facades", unaligned=True:非对其数据)
#         self.transform = transforms_                             ## transform变为tensor数据
#         self.unaligned = unaligned

#         self.files_A = sorted(glob.glob(os.path.join(root, '%sA' % mode) + "/*.*"))     ## "./datasets/facades/trainA/*.*"
#         self.files_B = sorted(glob.glob(os.path.join(root, '%sB' % mode) + "/*.*"))     ## "./datasets/facades/trainB/*.*"

#     def __getitem__(self, index):
#         image_A = Image.open(self.files_A[index % len(self.files_A)])                   ## 在A中取一张照片

#         if self.unaligned:                                                              ## 如果采用非配对数据，在B中随机取一张
#             image_B = Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])
#         else:
#             image_B = Image.open(self.files_B[index % len(self.files_B)])

#         # # 如果是灰度图，把灰度图转换为RGB图
#         # if image_A.mode != "RGB":
#         #     image_A = to_rgb(image_A)
#         # if image_B.mode != "RGB":
#         #     image_B = to_rgb(image_B)
        
#         # 把RGB图像转换为tensor图, 方便计算，返回字典数据
#         item_A = self.transform(image_A)
#         item_B = self.transform(image_B)
#         return item_A, item_B, "human", "dog"

#     ## 获取A,B数据的长度
#     def __len__(self):
#         return max(len(self.files_A), len(self.files_B))


class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, mode='train'):
        """
        root_dir: 包含所有图片的目录路径。
        transform: 应用于图像的预处理函数。
        """
        self.root_dir = root_dir
        self.transform = transform
        self.face_images = []  # 存储人脸图像路径
        self.dog_images = []   # 存储狗图像路径

        # 加载人脸图片
        face_dir = os.path.join(root_dir, f'{mode}A')
        self.face_images = [os.path.join(face_dir, img_name) for img_name in os.listdir(face_dir)
                            if img_name.endswith(('.png', '.jpg', '.jpeg'))]

        # 加载狗图片
        dog_dir = os.path.join(root_dir, f'{mode}B')
        self.dog_images = [os.path.join(dog_dir, img_name) for img_name in os.listdir(dog_dir)
                           if img_name.endswith(('.png', '.jpg', '.jpeg'))]

    def __len__(self):
        # 返回最小长度以保证均匀抽样
        return max(len(self.face_images), len(self.dog_images))

    def __getitem__(self, idx):
        # 随机选择人脸或狗的图片进行返回
        if random.random() > 0.5:
            img_path = self.face_images[idx % len(self.face_images)]
            label = 0
        else:
            img_path = self.dog_images[idx % len(self.dog_images)]
            label = 1

        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        # 确保标签也为tensor
        label = torch.tensor(label)

        return image, label

In [24]:
BATCH_SIZE=32
train_dataloader = DataLoader(        ## 改成自己存放文件的目录
    ImageDataset("human_dog_colab", transform=transform, mode="train"),  ## "./datasets/facades" , unaligned:设置非对其数据
    batch_size=BATCH_SIZE,                                                                  ## batch_size = 1
    shuffle=True,
)
test_dataloader = DataLoader(        ## 改成自己存放文件的目录
    ImageDataset("human_dog_colab", transform=transform, mode="test"),  ## "./datasets/facades" , unaligned:设置非对其数据
    batch_size=BATCH_SIZE,                                                                  ## batch_size = 1
    shuffle=False,
)


In [25]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_features):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_features, in_features, kernel_size=3, padding=1, bias=False),
            nn.InstanceNorm2d(in_features),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_features, in_features, kernel_size=3, padding=1, bias=False),
            nn.InstanceNorm2d(in_features)
        )

    def forward(self, x):
        return x + self.block(x)

class Generator(nn.Module):
    def __init__(self, input_nc=3, output_nc=3, n_residual_blocks=6, num_classes=2):
        super(Generator, self).__init__()
        # Embedding layer for conditional input
        self.label_emb = nn.Embedding(num_classes, 50)

        # Initial convolution block with modified input channels
        self.initial = nn.Sequential(
            nn.Conv2d(input_nc + 50, 64, kernel_size=7, padding=3, bias=False),
            nn.InstanceNorm2d(64),
            nn.ReLU(inplace=True)
        )

        # Downsampling
        model = []
        in_features = 64
        out_features = in_features*2
        for _ in range(2):
            model += [
                nn.Conv2d(in_features, out_features, kernel_size=3, stride=2, padding=1, bias=False),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
            out_features = in_features*2

        # Residual blocks
        for _ in range(n_residual_blocks):
            model += [ResidualBlock(in_features)]

        # Upsampling
        out_features = in_features//2
        for _ in range(2):
            model += [
                nn.ConvTranspose2d(in_features, out_features, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
            out_features = in_features//2

        # Output layer
        model += [nn.Conv2d(64, output_nc, kernel_size=7, padding=3), nn.Tanh()]
        
        self.model = nn.Sequential(*model)

    def forward(self, x, labels):
        # Embed label and expand to match image size
        label_embedding = self.label_emb(labels).unsqueeze(2).unsqueeze(3)
        label_embedding = label_embedding.expand(-1, -1, x.size(2), x.size(3))
        # Concatenate label embedding and image in the channel dimension
        x = torch.cat((x, label_embedding), 1)
        x = self.initial(x)
        return self.model(x)


In [26]:
class Discriminator(nn.Module):
    def __init__(self, input_nc=3, num_classes=2):
        super(Discriminator, self).__init__()
        self.label_emb = nn.Embedding(num_classes, input_nc)

        self.model = nn.Sequential(
            # 输入层
            nn.Conv2d(input_nc + input_nc, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # 第一层下采样
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),

            # 第二层下采样
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),

            # 第三层下采样
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
            nn.InstanceNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),

            # 最后一层，没有批量归一化
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0)
        )
        self.linear = nn.Linear(25,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, img, labels):
        # 将标签嵌入并调整到与图像的维度匹配
        label_embedding = self.label_emb(labels).unsqueeze(2).unsqueeze(3)
        label_embedding = label_embedding.expand(-1, -1, img.size(2), img.size(3))
        # 将标签嵌入合并到图像通道中
        img = torch.cat((img, label_embedding), 1)
        img = self.model(img)
        img = torch.flatten(img,start_dim=1)
        img = self.linear(img)
        img = self.sigmoid(img)
        return img

In [27]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
adversarial_loss = nn.BCELoss()


In [28]:
for epoch in range(20):
    for i, (imgsA, labelsA) in enumerate(train_dataloader):
        valid = torch.ones(imgsA.size(0), 1, requires_grad=False).to(device)
        fake = torch.zeros(imgsA.size(0), 1, requires_grad=False).to(device)
        imgsA = imgsA.to(device)
        labelsA = labelsA.to(device)
        # print(valid.shape)
        # print(discriminator(imgsA, labelsA).shape)
        # 训练鉴别器
        optimizer_D.zero_grad()
        real_loss = adversarial_loss(discriminator(imgsA, labelsA), valid)
        noise = torch.randn(imgsA.size(0), 3,64,64).to(device)
        gen_imgs = generator(noise, labelsA)
        fake_loss = adversarial_loss(discriminator(gen_imgs.detach(), labelsA), fake)
        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()

        # 训练生成器
        optimizer_G.zero_grad()
        g_loss = adversarial_loss(discriminator(gen_imgs, labelsA), valid)
        g_loss.backward()
        optimizer_G.step()
        
        if i%10==0:
            print(f'Epoch: {epoch}, Batch: {i}, Sample: {i*BATCH_SIZE}, Loss_G: {g_loss.item()}, d_loss: {d_loss.item()}')


Epoch: 0, Batch: 0, Sample: 0, Loss_G: 2.0980844497680664, d_loss: 0.7068885564804077
Epoch: 0, Batch: 10, Sample: 160, Loss_G: 3.3038153648376465, d_loss: 0.5538271069526672
Epoch: 0, Batch: 20, Sample: 320, Loss_G: 3.8670549392700195, d_loss: 0.506575345993042
Epoch: 0, Batch: 30, Sample: 480, Loss_G: 3.6204676628112793, d_loss: 1.0642335414886475
Epoch: 0, Batch: 40, Sample: 640, Loss_G: 2.079232931137085, d_loss: 0.6405194401741028
Epoch: 0, Batch: 50, Sample: 800, Loss_G: 2.1151273250579834, d_loss: 0.8064865469932556
Epoch: 0, Batch: 60, Sample: 960, Loss_G: 2.91995906829834, d_loss: 0.5950760841369629
Epoch: 0, Batch: 70, Sample: 1120, Loss_G: 2.012485980987549, d_loss: 0.5202176570892334
Epoch: 0, Batch: 80, Sample: 1280, Loss_G: 1.9626606702804565, d_loss: 0.7711570858955383
Epoch: 0, Batch: 90, Sample: 1440, Loss_G: 1.8230597972869873, d_loss: 0.5065494775772095
Epoch: 0, Batch: 100, Sample: 1600, Loss_G: 2.388469696044922, d_loss: 0.6479138731956482
Epoch: 0, Batch: 110, Sam

In [31]:
x = torch.randn(1, 3, 128, 128).to(device)
labels = torch.tensor([0]).to(device)
output = generator(x, labels)
vutils.save_image(output, f'test8 {i}.png', normalize=True)