In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
from torchvision.utils import save_image

In [7]:
# Create a directory if not exists
sample_dir="samples"
if not os.path.exists(sample_dir):
    os.makedirs(sample_dir)
print(sample_dir)

samples


In [8]:
batch_size = 256
learning_rate = 0.001
num_epoch = 10

In [9]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

In [10]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
latent_size = 64
hidden_size = 256
image_size = 784
num_epochs = 200
batch_size = 100


  return torch._C._cuda_getDeviceCount() > 0


In [11]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

In [12]:
# Discriminator
D = nn.Sequential(
    nn.Linear(image_size, hidden_size),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size, hidden_size),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size, 1),
    nn.Sigmoid())

# Generator 
G = nn.Sequential(
    nn.Linear(latent_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, image_size),
    nn.Tanh())

In [13]:
# Device setting
D = D.to(device)
G = G.to(device)

# Binary cross entropy loss and optimizer
criterion = nn.BCELoss()
d_optimizer = torch.optim.Adam(D.parameters(), lr=0.0002)
g_optimizer = torch.optim.Adam(G.parameters(), lr=0.0002)

In [14]:
def denorm(x):
    out = (x + 1) / 2
    return out.clamp(0, 1)

def reset_grad():
    d_optimizer.zero_grad()
    g_optimizer.zero_grad()

In [15]:
# Start training
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(train_loader):
        images = images.reshape(batch_size, -1).to(device)
        
        # Create the labels which are later used as input for the BCE loss
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # ================================================================== #
        #                      Train the discriminator                       #
        # ================================================================== #

        # Compute BCE_Loss using real images where BCE_Loss(x, y): - y * log(D(x)) - (1-y) * log(1 - D(x))
        # Second term of the loss is always zero since real_labels == 1
        outputs = D(images)
        d_loss_real = criterion(outputs, real_labels)
        real_score = outputs
        
        # Compute BCELoss using fake images
        # First term of the loss is always zero since fake_labels == 0
        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)
        d_loss_fake = criterion(outputs, fake_labels)
        fake_score = outputs
        
        # Backprop and optimize
        d_loss = d_loss_real + d_loss_fake
        reset_grad()
        d_loss.backward()
        d_optimizer.step()
        
        # ================================================================== #
        #                        Train the generator                         #
        # ================================================================== #

        # Compute loss with fake images
        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)
        
        # We train G to maximize log(D(G(z)) instead of minimizing log(1-D(G(z)))
        # For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf
        g_loss = criterion(outputs, real_labels)
        
        # Backprop and optimize
        reset_grad()
        g_loss.backward()
        g_optimizer.step()
        
        if (i+1) % 200 == 0:
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}' 
                  .format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(), 
                          real_score.mean().item(), fake_score.mean().item()))
    
    # Save real images
    if (epoch+1) == 1:
        images = images.reshape(images.size(0), 1, 28, 28)
        save_image(denorm(images), os.path.join(sample_dir, 'real_images.png'))
    
    # Save sampled images
    fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28)
    save_image(denorm(fake_images), os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1)))



Epoch [0/200], Step [200/600], d_loss: 0.1372, g_loss: 4.2828, D(x): 0.93, D(G(z)): 0.06
Epoch [0/200], Step [400/600], d_loss: 0.4088, g_loss: 6.5516, D(x): 0.79, D(G(z)): 0.09
Epoch [0/200], Step [600/600], d_loss: 0.3020, g_loss: 3.8171, D(x): 0.85, D(G(z)): 0.11
Epoch [1/200], Step [200/600], d_loss: 0.1990, g_loss: 3.4505, D(x): 0.93, D(G(z)): 0.10
Epoch [1/200], Step [400/600], d_loss: 1.3674, g_loss: 1.6103, D(x): 0.67, D(G(z)): 0.52
Epoch [1/200], Step [600/600], d_loss: 0.6317, g_loss: 2.2950, D(x): 0.80, D(G(z)): 0.28
Epoch [2/200], Step [200/600], d_loss: 0.2201, g_loss: 3.6553, D(x): 0.91, D(G(z)): 0.10
Epoch [2/200], Step [400/600], d_loss: 0.3896, g_loss: 3.7631, D(x): 0.86, D(G(z)): 0.16
Epoch [2/200], Step [600/600], d_loss: 0.8600, g_loss: 2.2243, D(x): 0.70, D(G(z)): 0.20
Epoch [3/200], Step [200/600], d_loss: 1.6610, g_loss: 2.3880, D(x): 0.64, D(G(z)): 0.33
Epoch [3/200], Step [400/600], d_loss: 0.3671, g_loss: 2.6310, D(x): 0.86, D(G(z)): 0.12
Epoch [3/200], Step [

Epoch [30/200], Step [600/600], d_loss: 0.1537, g_loss: 6.1865, D(x): 0.96, D(G(z)): 0.02
Epoch [31/200], Step [200/600], d_loss: 0.0392, g_loss: 9.7471, D(x): 0.98, D(G(z)): 0.00
Epoch [31/200], Step [400/600], d_loss: 0.0921, g_loss: 6.9996, D(x): 0.97, D(G(z)): 0.02
Epoch [31/200], Step [600/600], d_loss: 0.0392, g_loss: 7.8200, D(x): 0.99, D(G(z)): 0.02
Epoch [32/200], Step [200/600], d_loss: 0.0850, g_loss: 7.7059, D(x): 0.97, D(G(z)): 0.02
Epoch [32/200], Step [400/600], d_loss: 0.1155, g_loss: 6.4436, D(x): 0.96, D(G(z)): 0.02
Epoch [32/200], Step [600/600], d_loss: 0.0943, g_loss: 6.0242, D(x): 0.98, D(G(z)): 0.06
Epoch [33/200], Step [200/600], d_loss: 0.0595, g_loss: 7.2054, D(x): 0.98, D(G(z)): 0.02
Epoch [33/200], Step [400/600], d_loss: 0.0541, g_loss: 6.4915, D(x): 0.98, D(G(z)): 0.03
Epoch [33/200], Step [600/600], d_loss: 0.2353, g_loss: 7.0485, D(x): 0.95, D(G(z)): 0.08
Epoch [34/200], Step [200/600], d_loss: 0.0503, g_loss: 6.6958, D(x): 0.99, D(G(z)): 0.03
Epoch [34/

Epoch [61/200], Step [400/600], d_loss: 0.1656, g_loss: 5.6429, D(x): 0.95, D(G(z)): 0.05
Epoch [61/200], Step [600/600], d_loss: 0.0905, g_loss: 5.8591, D(x): 0.96, D(G(z)): 0.03
Epoch [62/200], Step [200/600], d_loss: 0.0741, g_loss: 6.9022, D(x): 0.97, D(G(z)): 0.03
Epoch [62/200], Step [400/600], d_loss: 0.0682, g_loss: 8.0145, D(x): 0.98, D(G(z)): 0.02
Epoch [62/200], Step [600/600], d_loss: 0.1444, g_loss: 6.5980, D(x): 0.97, D(G(z)): 0.03
Epoch [63/200], Step [200/600], d_loss: 0.1480, g_loss: 5.3711, D(x): 0.97, D(G(z)): 0.07
Epoch [63/200], Step [400/600], d_loss: 0.1587, g_loss: 6.5561, D(x): 0.94, D(G(z)): 0.04
Epoch [63/200], Step [600/600], d_loss: 0.1422, g_loss: 6.3673, D(x): 0.97, D(G(z)): 0.05
Epoch [64/200], Step [200/600], d_loss: 0.1497, g_loss: 6.1153, D(x): 0.94, D(G(z)): 0.02
Epoch [64/200], Step [400/600], d_loss: 0.0897, g_loss: 6.2587, D(x): 0.97, D(G(z)): 0.04
Epoch [64/200], Step [600/600], d_loss: 0.2515, g_loss: 7.1060, D(x): 0.90, D(G(z)): 0.02
Epoch [65/

Epoch [92/200], Step [200/600], d_loss: 0.1709, g_loss: 5.4055, D(x): 0.94, D(G(z)): 0.03
Epoch [92/200], Step [400/600], d_loss: 0.1509, g_loss: 5.5863, D(x): 0.95, D(G(z)): 0.05
Epoch [92/200], Step [600/600], d_loss: 0.1582, g_loss: 5.1853, D(x): 0.96, D(G(z)): 0.06
Epoch [93/200], Step [200/600], d_loss: 0.2627, g_loss: 4.9497, D(x): 0.90, D(G(z)): 0.05
Epoch [93/200], Step [400/600], d_loss: 0.2582, g_loss: 4.5210, D(x): 0.93, D(G(z)): 0.08
Epoch [93/200], Step [600/600], d_loss: 0.1739, g_loss: 4.6023, D(x): 0.95, D(G(z)): 0.07
Epoch [94/200], Step [200/600], d_loss: 0.2446, g_loss: 5.1943, D(x): 0.91, D(G(z)): 0.06
Epoch [94/200], Step [400/600], d_loss: 0.1693, g_loss: 5.7962, D(x): 0.92, D(G(z)): 0.05
Epoch [94/200], Step [600/600], d_loss: 0.2135, g_loss: 4.0142, D(x): 0.97, D(G(z)): 0.12
Epoch [95/200], Step [200/600], d_loss: 0.1610, g_loss: 5.1096, D(x): 0.97, D(G(z)): 0.07
Epoch [95/200], Step [400/600], d_loss: 0.2566, g_loss: 4.2509, D(x): 0.93, D(G(z)): 0.10
Epoch [95/

Epoch [122/200], Step [400/600], d_loss: 0.2309, g_loss: 4.3245, D(x): 0.93, D(G(z)): 0.10
Epoch [122/200], Step [600/600], d_loss: 0.1553, g_loss: 5.2852, D(x): 0.93, D(G(z)): 0.05
Epoch [123/200], Step [200/600], d_loss: 0.3372, g_loss: 4.0201, D(x): 0.90, D(G(z)): 0.09
Epoch [123/200], Step [400/600], d_loss: 0.1890, g_loss: 4.2864, D(x): 0.92, D(G(z)): 0.06
Epoch [123/200], Step [600/600], d_loss: 0.1976, g_loss: 4.4548, D(x): 0.97, D(G(z)): 0.12
Epoch [124/200], Step [200/600], d_loss: 0.3246, g_loss: 4.8554, D(x): 0.88, D(G(z)): 0.07
Epoch [124/200], Step [400/600], d_loss: 0.2731, g_loss: 4.0701, D(x): 0.92, D(G(z)): 0.11
Epoch [124/200], Step [600/600], d_loss: 0.2275, g_loss: 4.2774, D(x): 0.94, D(G(z)): 0.09
Epoch [125/200], Step [200/600], d_loss: 0.2238, g_loss: 5.7951, D(x): 0.95, D(G(z)): 0.09
Epoch [125/200], Step [400/600], d_loss: 0.1955, g_loss: 4.4276, D(x): 0.91, D(G(z)): 0.07
Epoch [125/200], Step [600/600], d_loss: 0.4118, g_loss: 3.9257, D(x): 0.92, D(G(z)): 0.13

Epoch [152/200], Step [600/600], d_loss: 0.2027, g_loss: 4.1997, D(x): 0.92, D(G(z)): 0.07
Epoch [153/200], Step [200/600], d_loss: 0.3985, g_loss: 3.8421, D(x): 0.86, D(G(z)): 0.09
Epoch [153/200], Step [400/600], d_loss: 0.2813, g_loss: 4.5155, D(x): 0.91, D(G(z)): 0.09
Epoch [153/200], Step [600/600], d_loss: 0.3863, g_loss: 4.3469, D(x): 0.87, D(G(z)): 0.09
Epoch [154/200], Step [200/600], d_loss: 0.3186, g_loss: 3.4087, D(x): 0.93, D(G(z)): 0.14
Epoch [154/200], Step [400/600], d_loss: 0.2642, g_loss: 4.3835, D(x): 0.93, D(G(z)): 0.10
Epoch [154/200], Step [600/600], d_loss: 0.4701, g_loss: 4.0958, D(x): 0.88, D(G(z)): 0.17
Epoch [155/200], Step [200/600], d_loss: 0.1924, g_loss: 4.6046, D(x): 0.95, D(G(z)): 0.10
Epoch [155/200], Step [400/600], d_loss: 0.2812, g_loss: 3.7789, D(x): 0.91, D(G(z)): 0.11
Epoch [155/200], Step [600/600], d_loss: 0.4607, g_loss: 3.8451, D(x): 0.83, D(G(z)): 0.12
Epoch [156/200], Step [200/600], d_loss: 0.3302, g_loss: 3.3563, D(x): 0.89, D(G(z)): 0.09

Epoch [183/200], Step [200/600], d_loss: 0.3389, g_loss: 4.1294, D(x): 0.89, D(G(z)): 0.10
Epoch [183/200], Step [400/600], d_loss: 0.3838, g_loss: 3.7852, D(x): 0.92, D(G(z)): 0.13
Epoch [183/200], Step [600/600], d_loss: 0.3958, g_loss: 4.1646, D(x): 0.90, D(G(z)): 0.16
Epoch [184/200], Step [200/600], d_loss: 0.2359, g_loss: 5.0579, D(x): 0.90, D(G(z)): 0.06
Epoch [184/200], Step [400/600], d_loss: 0.2704, g_loss: 3.5220, D(x): 0.93, D(G(z)): 0.11
Epoch [184/200], Step [600/600], d_loss: 0.4410, g_loss: 3.8631, D(x): 0.85, D(G(z)): 0.10
Epoch [185/200], Step [200/600], d_loss: 0.4729, g_loss: 4.0361, D(x): 0.86, D(G(z)): 0.14
Epoch [185/200], Step [400/600], d_loss: 0.2837, g_loss: 3.2964, D(x): 0.92, D(G(z)): 0.11
Epoch [185/200], Step [600/600], d_loss: 0.3447, g_loss: 4.2445, D(x): 0.91, D(G(z)): 0.13
Epoch [186/200], Step [200/600], d_loss: 0.2648, g_loss: 3.5786, D(x): 0.91, D(G(z)): 0.09
Epoch [186/200], Step [400/600], d_loss: 0.3670, g_loss: 3.9360, D(x): 0.88, D(G(z)): 0.11

In [16]:
# Save the model checkpoints 
torch.save(G.state_dict(), 'G.ckpt')
torch.save(D.state_dict(), 'D.ckpt')