In [2]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--image_size', type=int, default=64)
parser.add_argument('--batch_size', type=int, default=64) # DCGAN paper original value used 128
parser.add_argument('--n_colors', type=int, default=3)
parser.add_argument('--z_size', type=int, default=100) # DCGAN paper original value
parser.add_argument('--G_h_size', type=int, default=128, help='Number of hidden nodes in the Generator. Too small leads to bad results, too big blows up the GPU RAM.') # DCGAN paper original value
parser.add_argument('--D_h_size', type=int, default=128, help='Number of hidden nodes in the Discriminator. Too small leads to bad results, too big blows up the GPU RAM.') # DCGAN paper original value
parser.add_argument('--lr_D', type=float, default=.00005, help='Discriminator learning rate') # 1/4 of DCGAN paper original value
parser.add_argument('--lr_G', type=float, default=.0002, help='Generator learning rate') # DCGAN paper original value
parser.add_argument('--n_epoch', type=int, default=1000)
parser.add_argument('--beta1', type=float, default=0.5, help='Adam betas[0], DCGAN paper recommends .50 instead of the usual .90')
parser.add_argument('--SELU', type=bool, default=False, help='Using scaled exponential linear units (SELU) which are self-normalizing instead of ReLU with BatchNorm. This improves stability.')
parser.add_argument('--seed', type=int)
parser.add_argument('--input_folder', default='./cats_bigger_than_64x64/', help='input folder')
parser.add_argument('--output_folder', default='WGAN_output', help='output folder')
parser.add_argument('--G_load', default='', help='Full path to Generator model to load (ex: /home/output_folder/run-5/models/G_epoch_11.pth)')
parser.add_argument('--D_load', default='', help='Full path to Discriminator model to load (ex: /home/output_folder/run-5/models/D_epoch_11.pth)')
parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
parser.add_argument('--n_gpu', type=int, default=1, help='number of GPUs to use')
parser.add_argument('--n_workers', type=int, default=2, help='Number of subprocess to use to load the data. Use at least 2 or the number of cpu cores - 1.')
parser.add_argument('--weight_decay', type=float, default=0, help='L2 regularization weight. Greatly helps convergence but leads to artifacts in images, not recommended.')
parser.add_argument('--gen_extra_images', type=int, default=0, help='Every epoch, generate additional images with "batch_size" random fake cats.')
parser.add_argument('--n_critic', type=int, default=5, help='Number of training with D before training G') # WGAN original value
parser.add_argument('--clip', type=float, default=.01, help='Clipping value') # WGAN original value
param, unknown = parser.parse_known_args()

In [3]:
## Imports

import time
start = time.time()
import os
run=0
base_dir = f"{param.output_folder}/run-{run}/"
while os.path.exists(base_dir):
    run+=1
    base_dir = f"{param.output_folder}/run-{run}"
os.makedirs(base_dir)
logs_dir = f"{base_dir}/logs"
os.makedirs(logs_dir)
os.makedirs(f"{base_dir}/images")
os.makedirs(f"{base_dir}/models")
if param.gen_extra_images>0:
    os.makedirs(f"{base_dir}/images/extra/")
    
log_output = open(f"{logs_dir}/log.txt",'w')
print(param)
print(param,file=log_output)

import torch
import torch.autograd as autograd
from torch.autograd import Variable

from tensorboard_logger import configure,log_value
configure(logs_dir,flush_secs=5)

import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transf
import torchvision.models as models
import torchvision.utils as vutils
from torch.utils.data import Dataset
from natsort import natsorted
# from PIL import Image
import PIL
import imageio as iio

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if param.cuda:
    import torch.backends.cudnn as cudnn
    cudnn.benchmark = True
    
from IPython.display import Image
to_img = transf.ToPILImage()

import math

import random
param.seed = param.seed or random.randint(1,10000)
print(f"Random Seed: {param.seed}")
print(f"Random Seed: {param.seed}",file=log_output)
random.seed(param.seed)
torch.manual_seed(param.seed)

if param.cuda:
    torch.cuda.manual_seed_all(param.seed)
    
## Transforming images
trans = transf.Compose([
    transf.Resize((param.image_size,param.image_size)),
    # This makes it into [0,1]
    transf.ToTensor(),
    # This makes it into [-1,1] so tanh will work properply
    transf.Normalize(mean = [0.5, 0.5, 0.5], std = [0.5, 0.5, 0.5])
])

class CustomDataSet(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = natsorted(all_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = PIL.Image.open(img_loc).convert("RGB")
        # image = iio.imread(img_loc)
        tensor_image = self.transform(image)
        return tensor_image
    
my_dataset = CustomDataSet(param.input_folder, transform=trans)
dataset = torch.utils.data.DataLoader(my_dataset , batch_size=param.batch_size, shuffle=True, 
                               num_workers=param.n_workers)

Namespace(D_h_size=128, D_load='', G_h_size=128, G_load='', SELU=False, batch_size=64, beta1=0.5, clip=0.01, cuda=True, gen_extra_images=0, image_size=64, input_folder='./cats_bigger_than_64x64/', lr_D=5e-05, lr_G=0.0002, n_colors=3, n_critic=5, n_epoch=1000, n_gpu=1, n_workers=2, output_folder='WGAN_output', seed=None, weight_decay=0, z_size=100)
Random Seed: 5798


In [3]:
# !pip install tensorboard_logger
# !pip install natsort

In [4]:
## Models
# The number of layers is implicitly determined by the image size
# image_size = (4,8,16,32,64,128,256, 512, 1024) leads to n_layers = (1,2,3,4,5,6,7,8,9)

# The more layers the bigger the neural get so it's best to decrease D_h_size and D_h_size when the image input is bigger

# DCGAN generator
class DCGAN_G(torch.nn.Module):
    def __init__(self):
        super(DCGAN_G,self).__init__()
        main = torch.nn.Sequential()
        
        # We need to know how many layers we will use at the beginning
        mult = param.image_size // 8
        
        
        ### Start block
        # Z_size random numbers
        main.add_module('Start-ConvTranspose2d',torch.nn.ConvTranspose2d(param.z_size,param.G_h_size*mult,kernel_size=4,stride=1,padding=0,bias=False))
        if param.SELU:
            main.add_module('Start-SELU',torch.nn.SELU(inplace=True))
        else:
            main.add_module('Start-BatchNorm2d',torch.nn.BatchNorm2d(param.G_h_size*mult))
            main.add_module('Start-ReLU',torch.nn.ReLU())
        # Size = (G_h_size * mult)x 4 x4
        
        ### Middle block (Done until we reach  ? x image_size/2 x image_size/2)
        i=1
        while mult>1:
            main.add_module('Middle-ConvTranspose2d [%d]'%i, torch.nn.ConvTranspose2d(param.G_h_size*mult,param.G_h_size*(mult//2),kernel_size=4,stride=2,padding=1,bias=False))
            if param.SELU:
                main.add_module('Middle-SELU [%d]'%i,torch.nn.SELU(inplace=True))
            else:
                main.add_module('Middle-BatchNorm2d [%d]'%i,torch.nn.BatchNorm2d(param.G_h_size*(mult//2)))
                # main.add_module('Middle_ReLU [$d]'%i,torch.nn.SELU(inplace=True))
                main.add_module('Middle-ReLU [%d]' % i, torch.nn.ReLU(inplace=True))
            # Size = (G_h_size * (mult/(2*i)))x8x8
            mult = mult // 2
            i+=1
        
        ### End block
        # Size = G_h_size/2 x image_size/2
        main.add_module('End-ConvTransposed2d', torch.nn.ConvTranspose2d(param.G_h_size,param.n_colors,kernel_size=4,stride=2,padding=1,bias=False))
        main.add_module('End-Tanh',torch.nn.Tanh())
        # Size = n_colors x image_size x image_size
        self.main = main
    
    def forward(self,input):
        if isinstance(input.data,torch.cuda.FloatTensor) and param.n_gpu>1:
            output = torch.nn.parallel.data_parallel(self.main,input,range(param.n_gpu))
        else:
            output = self.main(input)
        return output
# DCGAN discriminator ( using somewhat the reverse of the generator)
class DCGAN_D(torch.nn.Module):
    def __init__(self):
        super(DCGAN_D,self).__init__()
        main = torch.nn.Sequential()
        
        ### start block
        # size = n_colors x image_size x image_size
        main.add_module('Start-conv2d',torch.nn.Conv2d(param.n_colors,param.D_h_size,kernel_size=4,stride=2,padding=1,bias=False))
        if param.SELU:
            main.add_module('Start-SELU',torch.nn.SELU(inplace=True))
        else:
            main.add_module('Start-LeakyReLU',torch.nn.LeakyReLU(0.2,inplace=True))
        image_size_new = param.image_size // 2
        # Size = D_h_size x image_size/2 x image_size/2
        
        ### Middle block (Done until we reach ? x 4 x4)
        mult = 1
        i=0
        while image_size_new>4:
            main.add_module('Middle-Conv2d [%d]'%i,torch.nn.Conv2d(param.D_h_size*mult,param.D_h_size*(2*mult),kernel_size=4,stride=2,padding=1,bias=False))
            if param.SELU:
                main.add_module('Middle-SELU [%d]'%i,torch.nn.SELU(inplace=True))
            else:
                main.add_module('Middle-BatchNorm2d [%d]'%i,torch.nn.BatchNorm2d(param.D_h_size*(2*mult)))
                main.add_module('Middle-LeakyReLU [%d]'%i,torch.nn.LeakyReLU(0.2,inplace=True))
            # Size = (D_h_size*(2*i)) x image_size/(2*i) x image_size/(2*i)
            image_size_new = image_size_new // 2
            mult *=2
            i +=1
            
        ## End Block
        # size = (D_h_size*mult) x 4 x4
        main.add_module('End-Conv2d',torch.nn.Conv2d(param.D_h_size*mult,1,kernel_size=4,stride=1,padding=0,bias=False))
        main.add_module('Sigmoid',torch.nn.Sigmoid())
        # size = 1 x1 x1 (Is a real cat or not)
        self.main = main
    def forward(self,input):
        if isinstance(input.data,torch.cuda.FloatTensor) and param.n_gpu>1:
            output = torch.nn.parallel.data_parallel(self.main,input,range(param.n_gpu))
        else:
            output = self.main(input)
            
            # Convert from 1 x 1 x 1 to 1 so that we can compare to given label (cat or not?)
            return output.view(-1)
        
## weights init function, DCGAN use 0.02 std
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0,0.02)
    elif classname.find('BatchNorm') != -1:
        # estimate variance, must be around 1
        m.weight.data.normal_(1.0,0.02)
        # estimate mean, must be around 0
        m.bias.data.fill_(0)
        
# Init
G = DCGAN_G()
D = DCGAN_D()

G.apply(weights_init)
D.apply(weights_init)

# Load existing models
if param.G_load !='':
    G.load_state_dict(torch.load(param.G_load))
if param.D_load !='':
    D.load_state_dict(torch.load(param.D_load))
    
print(G)
print(G,file=log_output)
print(D)
print(D,file=log_output)

# criterion
# criterion = torch.nn.BCELoss()

z_test = torch.FloatTensor(param.batch_size,param.z_size,1,1).normal_(0,1)
one = torch.FloatTensor([1])
one_neg = one * -1

# Everthing cuda

G = G.to(device)
D = D.to(device)
# criterion = criterion.to(device)
z_test = z_test.to(device)
one = one.to(device)
one_neg = one_neg.to(device)

# Based on DCGAN paper, they found using betas[0]=0.50 better.
# betas[0] represent is the weight given to the previous mean of the gradient
# betas[1] is the weight given to the previous variance of the gradient
# Optimizer
optimizerD = torch.optim.RMSprop(D.parameters(), lr=param.lr_D)
optimizerG = torch.optim.RMSprop(G.parameters(), lr=param.lr_G)

DCGAN_G(
  (main): Sequential(
    (Start-ConvTranspose2d): ConvTranspose2d(100, 1024, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (Start-BatchNorm2d): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Start-ReLU): ReLU()
    (Middle-ConvTranspose2d [1]): ConvTranspose2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (Middle-BatchNorm2d [1]): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Middle-ReLU [1]): ReLU(inplace=True)
    (Middle-ConvTranspose2d [2]): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (Middle-BatchNorm2d [2]): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Middle-ReLU [2]): ReLU(inplace=True)
    (Middle-ConvTranspose2d [3]): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (Middle-BatchNorm2d [3]): BatchNorm2d(128, eps=1e-05, 

In [None]:
## fitting model
gen_iterations = 0
for epoch in range(param.n_epoch):
    # Fake images saved
    if get_iterations %50 ==0:
        fake_test = G(z_test)
        vutils.save_image(fake_test.data, '%s/run-%d/images/fake_samples_iter%03d.png' % (param.output_folder, run, gen_iterations/50), normalize=True)
        for ext in range(param.gen_extra_images):
            z_extra = torch.FloatTensor(param.batch_size,param.z_size,1,1).normal_(0,1)
            if param.cuda:
                z_extra = z_extra.cuda()
            fake_test = G(Variable(z_extra))
            vutils.save_image(fake_test.data, '%s/run-%d/images/extra/fake_samples_iter%03d_extra%01d.png' % (param.output_folder, run, gen_iterations/50, ext), normalize=True)
            
    # Setting up iterable
    i=0
    data_iter = iter(dataset)

    while i<len(dataset):
        # Update D network
        # Trick used in Wassertein GAN paper for more stable convergence
        if gen_iterations <25 or gen_iterations %500 ==0:
            N_critic = 100
        else:
            N_critic = param.n_critic
            
        t=0
        while t<N_critic and i < len(dataset):
            # Update D
            D.zero_grad()

            # Clip weights
            for p in D.parameters():
                p.data.clamp_(param.clip,param.clip)
                
            # Sample real data
            real_images,labels = data_iter.__next__()
            # Mostly necessaary for last one becaus if N might not be a multiple of batch size
            current_batch_size = real_image.size(0)
            real_images = real_images.to(device)
            
            # Discriminator Loss real
            errD_real = D(real_images)
            errD_real.backward(one)
            
            # Sample fake data
            # Note that z might be bigger than x here, this is done like this in Wassertein paper, but it could probably be changed
            z = torch.randn(current_batch_size,param.z_size,1,1).to(device)
            # Volatile requires less memory and make things sightly faster than detach(), so why not use it with DCGAN?
            #Simply because we reuse the same fake images, but in WGAN we generate new fake images after training for a while the Discriminator
            z_volatile = Variable(z.data,volatile=True)
            x_fake = Variable(G(z_volatile).data)
            # Discriminator Loss fake
            errD_fake = D(x_fake)
            errD_fake.backward(one_neg)
            
            # Optimize
            errD = (errD_real-errD_fake)
            optimizerD.step()

            # Iterate up
            t = t+1
            i = i+1
            
        # Update G network
        G.zero_grad()

        # Sample fake data
        z = torch.randn(current_batch_size,param.z_size,1,1).to(device)
        x_fake = G(z)
        
        # Generator Loss
        errG = D(x_fake)
        errG.backward(one)
        optimizerG.step()

        # Log results so we can see them in TensorBoard after
        log_value('errD',-errD.item(),gen_iterations)
        log_value('errG',errG.item(),gen_iterations)
        
        gen_iterations = gen_iterations + 1
        
        if gen_iterations % 50 == 0:
            end = time.time()
            print('[%d] W_distance: %.4f Loss_G: %.4f time:%.4f' % (gen_iterations, -errD.item(), errG.item(), end - start))
            print('[%d] W_distance: %.4f Loss_G: %.4f time:%.4f' % (gen_iterations, -errD.item(), errG.item(), end - start), file=log_output)
        # Save models
        if gen_iterations % 500 == 0:
            torch.save(G.state_dict(), '%s/run-%d/models/G_%d.pth' % (param.output_folder, run, gen_iterations/50))
            torch.save(D.state_dict(), '%s/run-%d/models/D_%d.pth' % (param.output_folder, run, gen_iterations/50))   
                

[0/1000][0/146] Loss_D: 1.9076 Loss_G: 3.4784 D(x): 0.4718 D(G(z)): 0.5291/0.0506 time:26.4309
[0/1000][50/146] Loss_D: 2.1708 Loss_G: 11.8502 D(x): 0.6353 D(G(z)): 0.3329/0.0000 time:57.1853
[0/1000][100/146] Loss_D: 2.0977 Loss_G: 9.9908 D(x): 0.4933 D(G(z)): 0.1902/0.0001 time:89.5364
[1/1000][0/146] Loss_D: 0.6042 Loss_G: 9.0297 D(x): 0.8592 D(G(z)): 0.2608/0.0002 time:121.0136
[1/1000][50/146] Loss_D: 2.3298 Loss_G: 2.4580 D(x): 0.5761 D(G(z)): 0.6764/0.1378 time:153.4726
[1/1000][100/146] Loss_D: 1.1678 Loss_G: 1.8780 D(x): 0.6423 D(G(z)): 0.4135/0.1883 time:185.0360
[2/1000][0/146] Loss_D: 1.5122 Loss_G: 1.3121 D(x): 0.4181 D(G(z)): 0.2897/0.3081 time:215.6222
[2/1000][50/146] Loss_D: 1.4271 Loss_G: 1.3759 D(x): 0.5908 D(G(z)): 0.4997/0.3153 time:247.3525
[2/1000][100/146] Loss_D: 1.0629 Loss_G: 1.6211 D(x): 0.7008 D(G(z)): 0.4579/0.2284 time:278.9653
[3/1000][0/146] Loss_D: 1.7254 Loss_G: 0.9119 D(x): 0.3710 D(G(z)): 0.3665/0.4233 time:308.3030
[3/1000][50/146] Loss_D: 1.4987 L

In [10]:
len(dataset)

1

In [32]:
!cp -r cats_bigger_than_64x64/.ipynb_checkpoints .

ls: invalid option -- 'e'
Try 'ls --help' for more information.


In [39]:
# !ls --help

In [45]:
import os
for file in os.listdir("cats_bigger_than_64x64"):
    if file.endswith(".jpg"):
        continue
    else:
        print(file)

In [46]:
# !rm -r cats_bigger_than_64x64/.ipynb_checkpoints