In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:

# --- 1.setup parameters ---

manualSeed = 999
torch.manual_seed(manualSeed)

dataroot = "archive (17)/" 
workers = 2
batch_size = 128
# image size (although anime faces are 96x96, we still use 64x64 for training to speed up)
image_size = 64 
# channels in the training images. For color images this is 3
nc = 3
# number of latent vector (i.e. size of generator input)
nz = 100
# generator feature map size
ngf = 64
# discriminator feature map size
ndf = 64

num_epochs = 50 
# learning rate for optimizers
lr = 0.0002
# Adam optimizer for both G and D
beta1 = 0.5
# available device
ngpu = 1

# check and create output folder
if not os.path.exists('dcgan_anime_results'):
    os.makedirs('dcgan_anime_results')

In [None]:
# torchvision.datasets.ImageFolder is a generic data loader
# that loads all the images from all the subfolders under a root folder.

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

dataset = dset.ImageFolder(root=dataroot, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
print("Using device:", device)


Using device: cuda:0


In [None]:
# --- 3. define the Generator and Discriminator ---
# DCGAN paper mentions that all model weights should be initialized from a normal distribution N(0, 0.02)
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
# --- 4. Generator ---
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # final state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.main(input)

In [None]:
# --- 5. Discriminator ---
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

In [None]:
# --- 6. initialize the networks, loss function and optimizers ---
# generate a generator and initialize weights
netG = Generator(ngpu).to(device)
netG.apply(weights_init)

# generate a discriminator and initialize weights
netD = Discriminator(ngpu).to(device)
netD.apply(weights_init)

# define the loss function
criterion = nn.BCELoss()

# create a fixed noise vector to visualize the progress of the generator during training
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# define real and fake labels
real_label = 1.
fake_label = 0.

# setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))


In [None]:
# --- 7.training loop  ---
print("Starting Training Loop...")
img_list = []
G_losses = []
D_losses = []
iters = 0

for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        output = netD(real_cpu).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach()).view(-1) # 使用 .detach() 避免梯度流向生成器
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        output = netD(fake).view(-1)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()
        
        # save losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        
        # print training stats every 50 mini-batches
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
        
        #save G's output on fixed_noise every 500 iterations
        if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
            vutils.save_image(img_list[-1], f"dcgan_anime_results/fake_faces_iter_{iters}.png")
            
        iters += 1


Starting Training Loop...
[0/50][0/497]	Loss_D: 2.1552	Loss_G: 3.6805	D(x): 0.4000	D(G(z)): 0.5920 / 0.0384
[0/50][50/497]	Loss_D: 1.8067	Loss_G: 8.8632	D(x): 0.4744	D(G(z)): 0.0000 / 0.0010
[0/50][100/497]	Loss_D: 0.8310	Loss_G: 8.7369	D(x): 0.8613	D(G(z)): 0.3732 / 0.0004
[0/50][150/497]	Loss_D: 0.6156	Loss_G: 11.5646	D(x): 0.9488	D(G(z)): 0.2882 / 0.0000
[0/50][200/497]	Loss_D: 0.4400	Loss_G: 5.5249	D(x): 0.7943	D(G(z)): 0.0356 / 0.0067
[0/50][250/497]	Loss_D: 0.5004	Loss_G: 7.0600	D(x): 0.8185	D(G(z)): 0.1350 / 0.0047
[0/50][300/497]	Loss_D: 0.5579	Loss_G: 5.2652	D(x): 0.8402	D(G(z)): 0.2383 / 0.0082
[0/50][350/497]	Loss_D: 1.1653	Loss_G: 5.5626	D(x): 0.5234	D(G(z)): 0.0065 / 0.0109
[0/50][400/497]	Loss_D: 2.6974	Loss_G: 11.6198	D(x): 0.9540	D(G(z)): 0.8445 / 0.0000
[0/50][450/497]	Loss_D: 0.8177	Loss_G: 9.4459	D(x): 0.8614	D(G(z)): 0.4040 / 0.0002
[1/50][0/497]	Loss_D: 0.7941	Loss_G: 9.0169	D(x): 0.5975	D(G(z)): 0.0009 / 0.0003
[1/50][50/497]	Loss_D: 0.3997	Loss_G: 5.4198	D(x): 0.

KeyboardInterrupt: 