This is notebook implementation of the DCGAN for utilizing free GPUs of GoogleColab!

In [1]:
import torch
import torch.nn as nn

class Discriminator(nn.Module):
    def __init__(self, channels_img, features_d):
        super(Discriminator, self).__init__()
        """In the paper they did not use BN in the first layer of Discriminator
           and in the last layer of Generator. This is why we do not use _block
           rightaway!
        """
        self.disc = nn.Sequential(
            # Input: N x channels_img x 64 x64
            nn.Conv2d(in_channels=channels_img,
                      out_channels=features_d,
                      kernel_size=4,
                      stride=2,
                      padding=1),
            nn.LeakyReLU(0.2),

            # Input: N x features_d x 32 x 32
            self._block(in_channels=features_d,
                        out_channels=features_d*2,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_d*2 x 16 x 16
            self._block(in_channels=features_d*2,
                        out_channels=features_d*4,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_d*4 x 8 x 8
            self._block(in_channels=features_d*4,
                        out_channels=features_d*8,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_d*8 x 4 x 4
            nn.Conv2d(in_channels=features_d*8,
                      out_channels=1,
                      kernel_size=4,
                      stride=2,
                      padding=0),
            # Output: N x 1 x 1 x 1

            nn.Sigmoid(),
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                bias=False, # Since we are using BN
            ),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.2),
        )

    def forward(self, x):
        return self.disc(x)


class Generator(nn.Module):
    def __init__(self, z_dim, channels_img, features_g):
        super(Generator, self).__init__()

        self.gen = nn.Sequential(
            # Input: N × z_dim x 1 x 1
            self._block(in_channels=z_dim,
                        out_channels=features_g*16,
                        kernel_size=4,
                        stride=1,
                        padding=0),

            # Input: N x features_g*16 × 4 x 4
            self._block(in_channels=features_g*16,
                        out_channels=features_g*8,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_g*8 × 8 x 8
            self._block(in_channels=features_g*8,
                        out_channels=features_g*4,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_g*4 × 16 x 16
            self._block(in_channels=features_g*4,
                        out_channels=features_g*2,
                        kernel_size=4,
                        stride=2,
                        padding=1),

            # Input: N x features_g*2 × 32 x 32
            nn.ConvTranspose2d(
                in_channels=features_g*2,
                out_channels=channels_img,
                kernel_size=4,
                stride=2,
                padding=1,
            ),

            # Output: N x channels_img × 64 x 64
            nn.Tanh(), # to map the output to [-1, 1]
        )

    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        return nn.Sequential(
            nn.ConvTranspose2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                bias=False, # Since we are using BN
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.gen(x)

In [2]:
"""initialize_weights function is based on instructions in the original paper"""
def initialize_weights(model):
    for module in model.modules():
        if isinstance(module, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d)):
            nn.init.normal_(module.weight.data, 0.0, 0.02)

In [3]:
""" Testing the created architecture """
batch_size, channels_img, heigh, width, z_dim = 8, 3, 64, 64, 100

rand_img_batch = torch.randn((batch_size, channels_img, heigh, width))
rand_z = torch.randn((batch_size, z_dim, 1, 1))

gen = Generator(z_dim=z_dim, channels_img=channels_img, features_g=8)
initialize_weights(gen)
disc =Discriminator(channels_img=channels_img, features_d=8)
initialize_weights(disc)

assert gen(rand_z).shape == (batch_size, channels_img, heigh, width)
assert disc(rand_img_batch).shape == (batch_size, 1, 1, 1)

print("success!")

success!


In [4]:
def save_png(im, file_path):
    rows, cols = 4, 8

    fig, axes = plt.subplots(rows, cols, figsize=(16, 8))
    axes = axes.flatten()

    for i in range(len(axes)):
        axes[i].imshow(im[i], cmap='gray')
        axes[i].axis('off')
    plt.tight_layout()
    plt.savefig(file_path)
    plt.close()

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
from matplotlib import image as mpimg
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [6]:
""" Hyperparameters """
batch_size = 128
img_size = 64
channels_img = 1 # for mnist
z_dim = 100
features_disc = 64
features_gen = 64

transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * channels_img, [0.5] * channels_img)
])

In [7]:
dataset = datasets.MNIST(root= "./GenerativeAI/DCGAN/dataset/", train=True, transform=transforms, download=True)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
gen = Generator(z_dim, channels_img, features_gen).to(device)
disc = Discriminator(channels_img, features_disc).to(device)
initialize_weights(gen)
initialize_weights(disc)

lr = 1e-3
opt_gen = optim.Adam(gen.parameters(), lr=lr, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=lr, betas=(0.5, 0.999))
criterion = nn.BCELoss()

fixed_noise = torch.randn(32, z_dim, 1, 1).to(device)

import os
save_path = "./GenerativeAI/DCGAN/logs/fake"
os.makedirs(save_path, exist_ok=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 116644814.53it/s]


Extracting ./GenerativeAI/DCGAN/dataset/MNIST/raw/train-images-idx3-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 121135693.82it/s]


Extracting ./GenerativeAI/DCGAN/dataset/MNIST/raw/train-labels-idx1-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 49535446.74it/s]

Extracting ./GenerativeAI/DCGAN/dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 23842964.67it/s]


Extracting ./GenerativeAI/DCGAN/dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./GenerativeAI/DCGAN/dataset/MNIST/raw



In [8]:
epochs = 5
step = 0
gen.train()
disc.train()

for epoch in range(epochs):
    for batch_idx, (real, _) in enumerate(loader):
        gen.train()
        disc.train()
        real = real.to(device)
        noise = torch.randn((batch_size, z_dim, 1, 1)).to(device)
        fake = gen(noise)

        # Train Discriminator: max{Log[D(x)] + Log[1 - D(G(Z))]}
        disc_real = disc(real).view(-1)
        loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake.detach()).view(-1)
        loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        loss_disc = (loss_disc_real + loss_disc_fake) / 2

        disc.zero_grad()
        """ `retain_graph=True` in the next line of code:
            Retains the computational graph for subsequent backward passes
        """
        loss_disc.backward(retain_graph=True)
        opt_disc.step()

        # Train Generator: min{Log[1 - D(G(Z))]} or equivalently max{Log[D(G(Z))]}
        disc_fake = disc(fake).view(-1)
        loss_gen = criterion(disc_fake, torch.ones_like(disc_fake))

        gen.zero_grad()
        loss_gen.backward()
        opt_gen.step()

        if batch_idx % 50 == 0:
            print(
                f"Epoch [{epoch+1}/{epochs}] Batch {batch_idx}/{len(loader)} "
                f"Loss D: {loss_disc:.4f}, Loss G: {loss_gen:.4f}"
            )

            gen.eval()
            with torch.no_grad():
                fake = gen(fixed_noise)
                img_grid_fake = torchvision.utils.make_grid(fake[:32,0,:,:], normalize=True)
                file_path = f"{save_path}/{str(step)}.png"
                im = img_grid_fake.cpu().numpy()
                save_png(im, file_path)

            step += 1


Epoch [1/5] Batch 0/469 Loss D: 0.6967, Loss G: 0.8920
Epoch [1/5] Batch 50/469 Loss D: 0.0094, Loss G: 5.1445
Epoch [1/5] Batch 100/469 Loss D: 1.4640, Loss G: 0.2331
Epoch [1/5] Batch 150/469 Loss D: 0.6866, Loss G: 1.4222
Epoch [1/5] Batch 200/469 Loss D: 0.8595, Loss G: 1.9036
Epoch [1/5] Batch 250/469 Loss D: 0.5050, Loss G: 1.4846
Epoch [1/5] Batch 300/469 Loss D: 0.5714, Loss G: 0.8957
Epoch [1/5] Batch 350/469 Loss D: 0.6296, Loss G: 1.1622
Epoch [1/5] Batch 400/469 Loss D: 0.6477, Loss G: 1.0051
Epoch [1/5] Batch 450/469 Loss D: 0.6515, Loss G: 0.9191
Epoch [2/5] Batch 0/469 Loss D: 0.6293, Loss G: 0.9646
Epoch [2/5] Batch 50/469 Loss D: 0.6937, Loss G: 0.7641
Epoch [2/5] Batch 100/469 Loss D: 0.6266, Loss G: 0.8574
Epoch [2/5] Batch 150/469 Loss D: 0.7695, Loss G: 1.1388
Epoch [2/5] Batch 200/469 Loss D: 0.5710, Loss G: 1.4093
Epoch [2/5] Batch 250/469 Loss D: 0.5734, Loss G: 1.1349
Epoch [2/5] Batch 300/469 Loss D: 0.6286, Loss G: 1.1144
Epoch [2/5] Batch 350/469 Loss D: 0.5

In [9]:
import imageio
from PIL import Image, ImageDraw, ImageFont

image_directory = "./GenerativeAI/DCGAN/logs/fake/"

output_gif_path = "./GenerativeAI/DCGAN/generated_gif.gif"

image_paths = []

for i in range(step):
    image_paths.append(os.path.join(image_directory, f"{i}"))

with imageio.get_writer(output_gif_path, duration=0.5) as gif_writer:
    for i, image_path in enumerate(image_paths):
        image = imageio.imread(f"{image_path}.png")

        step_number_text = f"Step: {i+1}"
        image = Image.fromarray(image)
        draw = ImageDraw.Draw(image)

        font = ImageFont.load_default()

        draw.text((10, 10), step_number_text, font=font, fill=(255, 0, 0))


        gif_writer.append_data(image)

print(f"GIF created and saved at: {output_gif_path}")


  image = imageio.imread(f"{image_path}.png")


GIF created and saved at: ./GenerativeAI/DCGAN/generated_gif.gif
