In [34]:
%matplotlib inline
#TODO: bring all parameters (e.g length of z) into 1 cell + restructure code maybe

import os
import random
import pickle as pkl
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.utils import make_grid
import matplotlib.pyplot as plt

from data.stanford_dogs import StanfordDogs

BATCH_SIZE = 128
ngpu = 1
lr = 0.0001

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

Random Seed:  999


In [35]:
"""
Yarne Hermann YPH2105
"""

train_dataset = StanfordDogs('./images')
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [36]:
# custom weights initialization called on netG and netD
# (from https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html)
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [37]:
"""
implementation of original generator
"""
"""
J.R. Carneiro JC4896
Yarne Hermann YPH2105
"""

class Generator(nn.Module):
    
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        
        '''
        The following is inspired by 
        https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
        which seemed a bit clearer and from the CAN paper
        '''
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( 100, 2048, 4, 1, 0, bias=False),
            nn.BatchNorm2d(2048),
            nn.ReLU(True),
            # state size. 2048 x 4 x 4
            nn.ConvTranspose2d(2048, 1024, 4, 2, 1, bias=False),
            nn.BatchNorm2d(1024),
            nn.ReLU(True),
            # state size. 1024 x 8 x 8
            nn.ConvTranspose2d( 1024, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            # state size. 512 x 16 x 16
            nn.ConvTranspose2d( 512, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            # state size. 256 x 32 x 32
            nn.ConvTranspose2d( 256, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            # state size. 128 x 64 x 64
            nn.ConvTranspose2d( 128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            # state size. 64 x 128 x 128
            nn.ConvTranspose2d( 64, 3, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. 3 x 256 x 256
        )
        
    def forward(self, x):
        return self.main(x)

In [38]:
"""
implementation of original discriminator
"""
"""
J.R. Carneiro JC4896
Yarne Hermann YPH2105
"""

class Discriminator(nn.Module):
    
    def __init__(self, ngpu, num_classes=120):
        super(Discriminator, self).__init__()
        
        '''
        The following is inspired by 
        https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
        which seemed a bit clearer and from the CAN paper
        '''
        
        self.ngpu = ngpu
        self.num_classes = num_classes
        # input is 3 x 256 x 256
        self.conv1 = nn.Conv2d(3, 32, 4, 2, 1, bias=False) 
        # state size. 32 x 128 x 128
        self.conv2 = nn.Conv2d(32, 64, 4, 2, 1, bias=False) 
        self.bn2 = nn.BatchNorm2d(64)
        # state size. 64 x 64 x 64
        self.conv3 = nn.Conv2d(64, 128, 4, 2, 1, bias=False)
        self.bn3 = nn.BatchNorm2d(128)
        # state size. 128 x 32 x 32
        self.conv4 = nn.Conv2d(128, 256, 4, 2, 1, bias=False)
        self.bn4 = nn.BatchNorm2d(256)
        # state size. 256 x 16 x 16
        self.conv5 = nn.Conv2d(256, 512, 4, 2, 1, bias=False)
        self.bn5 = nn.BatchNorm2d(512)
        # state size. 512 x 8 x 8
        self.conv6 = nn.Conv2d(512, 512, 4, 2, 1, bias=False)
        self.bn6 = nn.BatchNorm2d(512)
        
        self.real = nn.Linear(512 * 4 * 4, 1)
        
        self.multi1 = nn.Linear(512 * 4 * 4, 1024)
        self.multi2 = nn.Linear(1024, 512)
        self.multi3 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        shared_out = F.leaky_relu(self.conv1(x), 0.2, inplace=True)
        shared_out = F.leaky_relu(self.bn2(self.conv2(shared_out)), 0.2, inplace=True)
        shared_out = F.leaky_relu(self.bn3(self.conv3(shared_out)), 0.2, inplace=True)
        shared_out = F.leaky_relu(self.bn4(self.conv4(shared_out)), 0.2, inplace=True)
        shared_out = F.leaky_relu(self.bn5(self.conv5(shared_out)), 0.2, inplace=True)
        shared_out = F.leaky_relu(self.bn6(self.conv6(shared_out)), 0.2, inplace=True)
        

        shared_out = shared_out.view(-1, 512 * 4 * 4)

        real_output = F.sigmoid(self.real(shared_out))

        multi_output = self.multi1(shared_out)
        multi_output = self.multi2(multi_output)
        multi_output = F.softmax(self.multi3(multi_output))

        return real_output, multi_output

In [39]:
G = Generator(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    G = nn.DataParallel(G, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
G.apply(weights_init)

# Print the model
print(G)


Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 2048, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): ConvTranspose2d(2048, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): ConvTranspose2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace)
    (9): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace)
    (12): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): BatchNorm2d(128, eps=1

In [40]:
# D = Discriminator()
# Create the Discriminator
D = Discriminator(ngpu, num_classes=train_dataset.NUM_CLASSES).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    D = nn.DataParallel(D, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
D.apply(weights_init)

# Print the model
print(D)



Discriminator(
  (conv1): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn5): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(512, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (bn6): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tr

In [41]:
""" 
FROM Udacity DCGAN implementation
"""
# label should be 1 or 0
def real_loss(D_out, label=1, smooth=False):
    batch_size = D_out.size(0)
    # label smoothing
    if smooth:
        # smooth, real labels = 0.9
        if label == 1:
            labels = torch.ones(batch_size)*0.9
        else: # label == 0:
            labels = torch.ones(batch_size)*0.1
    else:
        if label == 1:
            labels = torch.ones(batch_size)
        else: # label == 0:
            labels = torch.zeros(batch_size)
        
    # move labels to GPU if available     
    if train_on_gpu:
        labels = labels.cuda()
    # binary cross entropy with logits loss
    criterion = nn.BCELoss()  # Changed from BCEWithLogitsLoss, because I saw BCEWithLogitsLoss is for if you don't add the sigmoid loss yourself
    # calculate loss
    loss = criterion(D_out.squeeze(), labels)
    return loss

In [42]:
"""
MODIFIED Udacity DCGAN implementation
"""
"""
J.R. Carneiro JC4896
"""
def multi_loss(D_out, labels):
    # batch_size = D_out.size(0)
    # labels = torch.zeros(batch_size) # fake labels = 0
    if train_on_gpu:
        labels = labels.cuda()
    criterion = nn.CrossEntropyLoss() 
    loss = criterion(D_out.squeeze(), labels)
    return loss

In [43]:
"""
Yarne Hermann YPH2105
"""
# Have to make sure to be correct about maximizing or minimizing loss.
# I took the negative of what is mentioned on page 9 in the paper in order to create a loss
# to be minimized. If I'm correct real_loss can be used as it is right now
def entropy_loss(D_out):
    batch_size = D_out.size(0)
    K = train_dataset.NUM_CLASSES
    loss = torch.zeros(batch_size)
    
    # softmaxing
    # e = torch.exp(D_out)
    # s = torch.sum(e, dim=1)
    # probabilities = e / s.view(BATCH_SIZE, 1)
    
    # Just regular normalization
    
    #probabilities = D_out / torch.sum(D_out, dim=1).view(batch_size, 1)
    
    #print(probabilities)
            
    for c in range(K):
        # labels = torch.ones(batch_size) * c
        # if train_on_gpu:
        #     labels = labels.cuda()
        
        # c_loss = 1/K * torch.log(probabilities[:, c]) + (1 - 1/K) * torch.log(torch.ones(batch_size)-probabilities[:, c])         
        c_loss = 1/K * torch.log(D_out[:, c]) + (1 - 1/K) * torch.log(torch.ones(batch_size)-D_out[:, c])         
        
        loss += c_loss
    #print(loss)
    return loss.sum() / batch_size

In [44]:
'''
test entropy loss
''' 
D_out_min_entropy = torch.zeros(BATCH_SIZE, train_dataset.NUM_CLASSES)
for i in range(BATCH_SIZE):
    D_out_min_entropy[i][0] = 1
D_out_random = torch.rand(BATCH_SIZE, train_dataset.NUM_CLASSES)

D_out_max_entropy = torch.ones(BATCH_SIZE, train_dataset.NUM_CLASSES) 

print(entropy_loss(D_out_min_entropy))
print(entropy_loss(D_out_random))
print(entropy_loss(D_out_max_entropy))


tensor(-inf)
tensor(-121.0368)
tensor(-inf)


In [45]:
# Initialize BCELoss function
#real_loss_criterion = nn.BCELoss()

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
fixed_z = torch.randn(BATCH_SIZE, 100, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0

# Setup Adam optimizers for both G and D
optimizerD = optim.SGD(D.parameters(), lr=lr)
optimizerG = optim.SGD(G.parameters(), lr=lr)


In [46]:
""" 
FROM Udacity DCGAN implementation
"""
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    G.cuda()
    D.cuda()
    print('GPU available for training. Models moved to GPU')
else:
    print('Training on CPU.')

Training on CPU.


In [47]:
# Training Loop

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
num_epochs = 5 #50
print_every = 50

print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
    # For each batch in the dataloader
    for batch_i, (real_images, real_labels) in enumerate(train_dataloader, 0):
        b_size = real_images.size(0)
        optimizerG.zero_grad()
        
        # 3.
        #z = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100)) 
        #z = torch.from_numpy(z).float()
        z = torch.randn(b_size, 100, 1, 1, device=device)
        #if train_on_gpu:
        #    z = z.cuda()
        
        # 4) Generate fake image batch with G
        fake_images = G(z)
        
        if train_on_gpu:
            real_images = real_images.cuda()
        
        # 5) Forward pass real batch through D
        D_real, D_multi = D(real_images) #.view(-1)
        d_real_real_loss = real_loss(D_real, label=1) 
        # 6.
        d_real_multi_loss = multi_loss(D_multi, real_labels)
        # 7.
        D_fake, D_fake_entropy = D(fake_images)
        d_fake_real_loss = real_loss(D_fake, label=0)
        # 8.
        g_fake_entropy_loss = entropy_loss(D_fake_entropy) ##
        
        # 9.
        d_loss= torch.log(d_real_real_loss)+torch.log(d_real_multi_loss)+torch.log(d_fake_real_loss) 
        #torch.log(1-g_fake_real_loss), the 1- is not necessary because computed against label=0 now
        print(d_real_real_loss.data.numpy(), d_real_multi_loss.data.numpy(), d_fake_real_loss.data.numpy(), d_loss.data.numpy())
        
        # 10.
        d_loss.backward(retain_graph=True)
        optimizerD.step()
        
        # 11.
        g_loss=torch.log(d_fake_real_loss)-g_fake_entropy_loss
        print(g_fake_real_loss.data.numpy(), g_fake_entropy_loss.data.numpy(), g_loss.data.numpy())
        
        # 12.
        g_loss.backward()
        optimizerG.step()
        print("-",g_loss.data.numpy(),d_loss.data.numpy())

        
        # Output training stats
        if batch_i % print_every == 0:
            # append discriminator loss and generator loss
            G_losses.append(g_loss.item())
            D_losses.append(d_loss.item())
            
            # print discriminator and generator loss
            print('Epoch [{:5d}/{:5d}] | d_loss: {:6.4f} | g_loss: {:6.4f}'.format(
                    epoch+1, num_epochs, d_loss.item(), g_loss.item()))

    
    ## AFTER EACH EPOCH##    
    # generate and save sample, fake images
    G.eval() # for generating samples
    if train_on_gpu:
        fixed_z = fixed_z.cuda()
    img_z = G(fixed_z).detach().cpu()
    img_list.append(make_grid(img_z, padding=2, normalize=True))
    G.train() # back to training mode    
    
# Save training generator samples
with open('train_samples.pkl', 'wb') as f:
    pkl.dump(img_list, f)
    
torch.save(G, 'G.pt')
torch.save(D, 'D.pt')


### END -   FROM Udacity DCGAN implementation ###

Starting Training Loop...




0.74797565 4.7875123 0.6987721 0.91719544
0.057120737 -5.7929893 5.4345584
- 5.4345584 0.91719544
Epoch [    1/    5] | d_loss: 0.9172 | g_loss: 5.4346
0.74780214 4.7870717 0.69778705 0.9154607
0.057120737 -5.7930737 5.4332323
- 5.4332323 0.9154607
0.77858126 4.7872562 0.5946165 0.7958368
0.057120737 -5.793006 5.273167
- 5.273167 0.7958368
0.8715407 4.7875113 0.49372518 0.72274184
0.057120737 -5.793393 5.087617
- 5.087617 0.72274184
1.01192 4.7875733 0.3340667 0.48145854
0.057120737 -5.793536 4.6971216
- 4.6971216 0.48145854
1.238984 4.787306 0.20066054 0.17411888
0.057120737 -5.792698 4.1865573
- 4.1865573 0.17411888
1.613193 4.7876353 0.08545934 -0.41546273
0.057120737 -5.792964 3.3332493
- 3.3332493 -0.41546273
2.137348 4.7873006 0.03346938 -1.0715919
0.057120737 -5.793118 2.3959937
- 2.3959937 -1.0715919
2.8597589 4.7875566 0.009355629 -2.0550199
0.057120737 -5.792378 1.1206007
- 1.1206007 -2.0550199
3.8075595 4.787935 0.0021973117 -3.217433
0.057120737 -5.7930617 -0.32745886
- -0.

RuntimeError: Assertion `x >= 0. && x <= 1.' failed. input value should be between 0~1, but got nan at /Users/distiller/project/conda/conda-bld/pytorch_1556653464916/work/aten/src/THNN/generic/BCECriterion.c:60