In [3]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--workers',type=int,default=8,help='Number of workers for dataloader')
parser.add_argument('--batch_size',type=int,default=128)
parser.add_argument('--n_class',type=int,default=27,help='Number of styles')
parser.add_argument('--image_size',type=int,default=64,help='Size of traning data')
parser.add_argument('--nc',type=int,default=3,help='Number of channels')
parser.add_argument('--nz',type=int,default=150,help='z latent vector')
parser.add_argument('--G_h_size',type=int,default=64,help='Size of feature maps in generator')
parser.add_argument('--D_h_size',type=int,default=32,help='Size of feature maps in discriminator')
parser.add_argument('--num_epochs',type=int,default=75)
parser.add_argument('--lr',type=float,default=0.0001)
parser.add_argument('--beta1 = 0.5',type=float,default=0.5,help='beta1 for adam')
parser.add_argument('--ngpu',type=int,default=1)
param, unknown = parser.parse_known_args()


In [2]:
# !pip install argparse

In [2]:
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') !=-1:
        nn.init.normal_(m.weights.data,0.0,0.02)
    elif classname.find('BatchNorm') !=-1:
        nn.init.normal_(m.weight.data,1.0,0.02)
        nn.init.constant_(m.bias.data,0)
        

        
# DCGAN generator
class DCGAN_G(nn.Module):
    def __init__(self,ngpu):
        super(DCGAN_G,self).__init__()
        self.ngpu = ngpu
        main = torch.nn.Sequential()
        
        
        # We need to know how many layers we will use at the beginning
        mult = param.image_size // 8
        
        
        ### Start block
        # Z_size random numbers
        main.add_module('Start-ConvTranspose2d',torch.nn.ConvTranspose2d(param.nz,param.G_h_size*mult,kernel_size=4,stride=1,padding=0,Bias=False))
        if param.SELU:
            main.add_module('Start-SELU',torch.nn.SELU(inplace=True))
        else:
            main.add_module('Start-BatchNorm2d',torch.nn.Batch2d(param.G_h_size*mult))
            main.add_module('Start-ReLU',torch.nn.ReLU())
        # Size = (G_h_size * mult)x 4 x4
        
        ### Middle block (Done until we reach  ? x image_size/2 x image_size/2)
        i=1
        while mult>1:
            main.add_module('Middle-ConvTranspose2d [%d]'%i, torch.nn.ConvTranspose2d(param.G_h_size*mult,param.G_h_size*(mult//2),kernel_size=4,stride=2,padding=1,bias=False))
            if param.SELU:
                main.add_module('Middle-SELU [%d]'%i,torch.nn.SELU(inplace=True))
            else:
                main.add_module('Middle-BatchNorm2d [%d]'%i,torch.nn.BatchNorm2d(param.G_h_size*(mult//2)))
                main.add_module('Middle_ReLU [$d]'%i,torch.nn.ReLU(inplace=True))
            # Size = (G_h_size * (mult/(2*i)))x8x8
            mult = mult // 2
            i+=1
        
        ### End block
        # Size = G_h_size/2 x image_size/2
        main.add_module('End-ConvTransposed2d', torch.nn.ConvTranspose2d(param.G_h_size,param.n_colors,kernel_size=4,stride=2,padding=1,bias=False))
        main.add_module('End-Tanh',torch.nn.Tanh())
        # Size = n_colors x image_size x image_size
        self.main = main
    
    def forward(self,input):
        if isinstance(input.data,torch.cuda.FloatTensor) and param.n_gpu>1:
            output = torch.nn.parallel.data_parallel(self.main,input,range(param.n_gpu))
        else:
            output = self.main(input)
        return output
    
# DCGAN discriminator ( using somewhat the reverse of the generator)
class DCGAN_D(torch.nn.Module):
    def __init__(self,ngpu):
        super(DCGAN_D,self).__init__()
        main = torch.nn.Sequential()
        self.ngpu = ngpu
        
        ### start block
        # size = n_colors x image_size x image_size
        main.add_module('Start-conv2d',torch.nn.Conv2d(param.n_colors,param.D_h_size,kernel=4,stride=2,padding=1,bias=False))
        if param.SELU:
            main.add_module('Start-SELU',torch.nn.SELU(inplace=True))
        else:
            main.add_module('Start-LeakyReLU',torch.nn.LeakyReLU(0.2,inplace=True))
        image_size_new = para.image_size // 2
        # Size = D_h_size x image_size/2 x image_size/2
        
        ### Middle block (Done until we reach ? x 4 x4)
        mult = 1
        i=0
        while image_size_new>4:
            main.add_module('Middle-Conv2d [%d]'%i,torch.nn.Conv2d(param.D_h_size*mult,param.D_h_size*(2*mult),kernel_size=4,stride=2,padding=1,biase=False))
            if param.SELU:
                main.add_module('Middle-SELU [%d]'%i,torch.nn.SELU(inplace=True))
            else:
                main.add_module('Middle-BatchNorm2d [%d]'%i,torch.nn.BatchNorm2d(param.D_h_size*(2*mult)))
                main.add_module('Middle-LeakyReLU [%d]'%i,torch.nn.LeakyReLU(0.2,inplace=True))
            # Size = (D_h_size*(2*i)) x image_size/(2*i) x image_size/(2*i)
            image_size_new = image_size_new // 2
            mult *=2
            i +=1
            
            ## End Block
            # size = (D_h_size*mult) x 4 x4
            main.add_module('End-Conv2d',torch.nn.Conv2d(param.D_h_size*mult,1,kernel_size=4,stride=1,padding=0,bias=False))
            main.add_module('Sigmoid',torch.nn.Sigmoid())
            # size = 1 x1 x1 (Is a real cat or not)
            self.main = main
        def forward(self,input):
            if isinstance(input.data,torch.cuda.FloatTensor) and param.n_gpu>1:
                output = torch.nn.parallel.data_parallel(self.main,input,range(param.n_gpu))
            else:
                output = self.main(input)
            
            # Convert from 1 x 1 x 1 to 1 so that we can compare to given label (cat or not?)
            return output.view(-1)


In [3]:
# Init
G = DCGAN_G()
D = DCGAN_D()

G.apply(weights_init)
D.apply(weights_init)

# Load existing models
if param.G_load !='':
    G.load_state_dict(torch.load(param.G_load))
if param.D_load !='':
    D.load_state_dict(torch.load(param.D_load))
    
print(G)
print(G,file=log_output)
print(D)
print(D,file=log_output)

# criterion
critirion = torch.nn.BCELoss()

# # soon to be variables
# x = torch.FloatTensor(param.batch_size,param.n_colors,param.image_size,param.image_size)
# y = torch.FloatTensor(param.batch_size)
# z = torch.FloatTensor(param.batch_size,param.z_size,1,1)
# # This is to see during training, size and values won't change
z_test = torch.FloatTensor(param.batch_size,param.z_size,1,1).normal_(0,1)

# Everthing cuda
G = G.to(device)
D = D.to(device)
criterion = criterion.to(device)
x = x.to(device)
y = y.to(device)
z = z.to(device)
z_test = z_test.to(device)
    
# # Now Variables
# x = Variable(x)
# y = Variable(y)
# z = Variable(z)
# z_test = Variable(z_test)

# Based on DCGAN paper, they found using betas[0]=0.50 better.
# betas[0] represent is the weight given to the previous mean of the gradient
# betas[1] is the weight given to the previous variance of the gradient
optimizerD = torch.optim.Adam(D.parameter(),lr=param.lr_D, betas=(param.beta1,0.999),weight_decay=param.weight_decay)
optimizerG = torch.optim.Adam(G.parameter(),lr=param.lr_G, betas=(param.beta1,0.999),weight_decay=param.weight_decay)

## fitting model
for epoch in range(param.n_epoch):
    
    # Fake images saved
    fake_test = G(z_test)
    vutils.save_image(fake_test.data,'%s/run-%d/images/fake_samples_epoch%03d_extra%01d.png'%(param.output_folder,run,epoch,ext),normalize=True)
    for ext in range(param.gen_extra_images):
        z_extra = torch.FloatTensor(param.batch_size,param.z_size,1,1).normal_(0,1)
        if param.cuda:
            z_extra = z_extra.cuda()
        fake_test = G(Variable(z_extra))
        vutils.save_image(fake_test.data, '%s/run-%d/images/extra/fake_samples_epoch%3d_extra%01d.png'%(param.output_folder,run,epoch,ext),normalize=True)
        
    for i, data_batch in enumerate(dataset,0):
        # Update D network
        
        for p in D.parameters():
            p.requires_grad = True
        
        # Train with real data
        D.zero_grad()
        # We can ignore labels since they are all cats
        images,labels = data_batch
        # Mostly necessary for the last one because if the N might not be a multiple of batch_size
        current_batch_size = images.size(0)
        if param.cuda:
            images= images.cuda()
        # Transfer batch of images to x
        x.data.resize_as_(images).copy_(images)
        # y is now a vector of size current_batch_size filled with 1
        y.data.resize_(current_batch_size).fill_(1)
        y_pred = D(x)
        errD_real = criterion(y_pred,y)
        errD_real.backward()
        # Var has data and gradient element, we keep the mean of the data element
        D_real = y_pred.data.mean()
        
        # Train with fake data
        z.data.resize_(current_batch_size,param.z_size,1,1).normal_(0,1)
        x_fake = G(z)
        y.data.resize_(current_batch_size).fill_(0)
        # Detach y_pred from the neural network G and put it inside D
        y_pred_fake = D(x_fake.detach())
        errD_fake = criterion(y_pred_fake,y)
        errD_fake.backward()
        D_fake = y_pred_fake.data.mean()
        errD = errD_real + errD_fake
        optimizerD.step()
        
        # Update G net work
        # make it tiny bit faster
        for p in D.parameters():
            p.requires_grad = False
            
        G.zero_grad()
        # Generator wants to foll discriminaor so it wants to minimize loss of discriminator assuming label is True
        y.data.resize_(current_batch_size).fill_(1)
        y_pred_fake = D(x_fake)
        errG = criterion(y_pred_fake,y)
        errG.backward(retain_graph=True)
        D_G = y_pred_fake.data.mean()
        optimizerG.step()
        
        current_step = i + epoch*len(dataset)
        # Log results so we can see them in TensorBoard after
        log_value('errD',errD.data[0],current_step)
        log_value('errG',errG.data[0],current_step)
        
        if i%50 ==0:
            end = time.time()
            fmt = '[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f/%.4f time:%.4f'
            s = fmt%(epoch,param.n_epoch,i,len(dataset),errD.data[0],errG.data[0],D_real,D_fake,D_G,end-start)
            print(s)
            print(s,file=log_output)
            
    # save every epoch
    fmt = '%s/run-%d/models/%s_epoch_%d.pth'
    if epoch%25==0:
        torch.save(G.state_dict(),mft%(param.output_folder, run, 'G',epoch))
        torch.save(D.state_dict(), fmt%(param.output_folder, run,'D',epoch))
        

TypeError: __init__() missing 1 required positional argument: 'ngpu'

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
device

device(type='cuda')

In [10]:
!nvidia-smi --list-gpus

GPU 0: Tesla K80 (UUID: GPU-522b75e0-87d4-687a-39ba-4f2cf7774ff8)


In [11]:
!sudo lshw -C video

[sudo] password for jovyan: 


In [12]:
!nvidia-smi --query-gpu=name --format=csv,noheader | wc -l

1


In [13]:
!cat /proc/cpuinfo | grep processor | wc -l

40


In [16]:
# !cat /proc/cpuinfo | grep 'core id'