# Deep Learning

# Tutorial 20: Generative Adversarial Network (GAN) 

In this tutorial, we will cover:

- Creating images with a GAN

Prerequisites:

- Python, PyTorch, Deep Learning Training, Stochastic Gradient Descent

My contact:

- Niklas Beuter (niklas.beuter@th-luebeck.de)

Course:

- Slides and notebooks will be available at https://lernraum.th-luebeck.de/course/view.php?id=5383

## Expected Outcomes
* Understand the architecture of a GAN including a generator and a discriminator
* Understand the difficulties of training a GAN

In [None]:
!pip install torch torchvision

## Download the pokemon dataset from https://pokemondb.net/sprites

In [None]:
import os
import requests
from bs4 import BeautifulSoup

# Erstelle ein Verzeichnis für die Bilder
os.makedirs('data/pokemon_images', exist_ok=True)

# URL der Seite mit den Pokémon-Sprites
url = 'https://pokemondb.net/sprites'

# Lade die Seite herunter
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Finde alle Bild-Links auf der Seite
img_tags = soup.find_all('img')
img_urls = [img['src'] for img in img_tags if 'src' in img.attrs and img['src'].startswith('https://img.pokemondb.net/sprites/')]

# Lade die Bilder herunter
for img_url in img_urls:
    img_name = os.path.basename(img_url)
    img_path = os.path.join('data/pokemon_images', img_name)
    with open(img_path, 'wb') as f:
        img_data = requests.get(img_url).content
        f.write(img_data)

print('Bilder wurden erfolgreich heruntergeladen.')

## Prepare the data

In [None]:
from PIL import Image
import glob
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

# Bildtransformationen
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Lade und transformiere die Bilder
def load_images(image_folder):
    images = []
    for img_path in glob.glob(f'{image_folder}/*.png'):
        img = Image.open(img_path)
        img = img.convert('RGB')  # Stelle sicher, dass das Bild RGB ist
        img = transform(img)
        images.append(img)
    return torch.stack(images)

# Lade die Pokémon-Bilder
images = load_images('data/pokemon_images')
print(f'{len(images)} Bilder wurden erfolgreich geladen und transformiert.')

# Hyperparameter
batch_size = 32

# DataLoader
dataloader = DataLoader(images, batch_size=batch_size, shuffle=True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision.utils as vutils

# Funktion zum Anzeigen von Trainingsbildern
def imshow(img):
    #img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Anzeige der Trainingsbilder vor dem Training
real_batch = next(iter(dataloader))
plt.figure(figsize=(8, 8))
plt.axis("off")
plt.title("Training Images")
imshow(vutils.make_grid(real_batch[:64], padding=2, normalize=True))

In [None]:
import torch
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Generator(nn.Module):
    def __init__(self, nz, ngf, nc):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            #nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)


class Discriminator(nn.Module):
    def __init__(self, nc, ndf):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            #nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

## Training

In [None]:
import torch.optim as optim
import torchvision.utils as vutils

# Hyperparameter
lr_D = 0.0002
lr_G = 0.0005
beta1 = 0.5
nz = 50  # Größe des latenten Vektors
ngf = 64
ndf = 64
num_epochs = 100

# Initialisiere das Modell
netG = Generator(nz, ngf, 3).to(device)
netD = Discriminator(3, ndf).to(device)

# Gewichte initialisieren
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

netG.apply(weights_init)
netD.apply(weights_init)

# Optimizer
optimizerD = optim.Adam(netD.parameters(), lr=lr_D, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr_G, betas=(beta1, 0.999))

# Loss function
criterion = nn.BCEWithLogitsLoss()

# Training Loop
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        # Update Discriminator
        netD.zero_grad()
        real_cpu = data.to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), 0.9, dtype=torch.float, device=device)
        output = netD(real_cpu).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(0.1)
        output = netD(fake.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
        optimizerD.step()

        # Update Generator
        netG.zero_grad()
        label.fill_(0.9) # the generator needs the discriminator to classify the faked images as true images
        output = netD(fake).view(-1) # classify the faked image
        errG = criterion(output, label) # in case the classifier classifies the faked image as fake, the generator gets a high loss and hence, needs to update its parameters
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()

        if i % 50 == 0:
            print(f'[{epoch}/{num_epochs}][{i}/{len(dataloader)}] Loss_D: {errD.item():.4f} Loss_G: {errG.item():.4f} D(x): {D_x:.4f} D(G(z)): {D_G_z1:.4f} / {D_G_z2:.4f}')

    # Speichere generierte Bilder
    if epoch % 100 == 0 or epoch == num_epochs - 1:
        with torch.no_grad():
            fake = netG(fixed_noise).detach().cpu()
        vutils.save_image(fake, f'results/fake_samples_epoch_{epoch}.png', normalize=True)


In [None]:
print(netG)
print(netD)

## Generate images

In [None]:
# Lade das trainierte Modell (falls gespeichert)
#netG.load_state_dict(torch.load('netG.pth'))

# Generiere neue Pokémon-Bilder
noise = torch.randn(64, nz, 1, 1, device=device)
with torch.no_grad():
    fake = netG(noise).detach().cpu()
    plt.figure(figsize=(8, 8))
    plt.axis("off")
    plt.title(f"Generated Images at Epoch {epoch}")
    imshow(vutils.make_grid(fake, padding=2, normalize=True))
    plt.show()
#vutils.save_image(fake, 'results/generated_pokemon.png', normalize=True)