In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import os
import time
from PIL import Image

  from ._conv import register_converters as _register_converters


In [2]:
tf.reset_default_graph()
np.random.seed(1) 
tf.set_random_seed(1)

num_train_imgs = 1500 # number of training images         
original_img_directory = '/home/mele/datasets/plant_deases/color/'
resized_img_directory = 'img_resizesd/'

pre_train_epochs = 1 # num of epochs to pre-train discrimitor
training_epochs = 20001 # num of epochs to train discriminator and generator jointly
batch_size = 64 # batch size for training the model, must be larger than batch_size_sample

# for the generated images
batch_size_sample = 36 # number of images to be generated for viewing the training progress
# the generated images will be concatenated, the product of the following must equal to batch_size_sample
sample_num_rows = 6 # how many rows to have
sample_num_columns = 6 # how many columns to have

leak = 0.2 # degree of leakiness used in leaky ReLU
alpha = 0.0002 # base learning rate
beta1 = 0.5 # the fraction factor used in the first momentum term from Adam optimizer
k = 1 # this is number of times to update generator for every time discriminator is updated in each epoch
logs_path = "./dcgan_leaf_log" # directory to save the training log to
train_sample_directory = './dcgan_leaf/train_sample/' # directory to save the generated images during training
model_directory = './dcgan_leaf/models' # directory to save trained model
sample_directory = './dcgan_leaf/generated_sample/' # directory to save the generated images

In [41]:
# # Preprocess by resizing them.

# # get a list of image filenames
# for a_dir in os.listdir(original_img_directory):
#     file_names = [f for f in os.listdir(original_img_directory+'/'+a_dir)[:100+1] if f.endswith('.JPG')]

#     # get a list of images
#     images = []
#     for f in file_names:
#         img = Image.open(original_img_directory+'/'+a_dir +'/'+f)
#         images.append(img.copy())
#         img.close() 

#     if not os.path.exists(resized_img_directory):
#         os.makedirs(resized_img_directory)

#     # save the resized images
#     for i,img in enumerate(images):
#         img = resizeimage.resize_contain(img, [64, 64]) # resize the image from 178*218 to 64*64
#         img.save(resized_img_directory+'/'+file_names[i], img.format)

In [4]:
# del images # remove images variable to free memory

In [3]:
# Read training images

# get a list of image filenames
for a_dir in os.listdir(original_img_directory):
    file_names = [f for f in os.listdir(original_img_directory+'/'+a_dir)[:1000+1] if f.endswith('.JPG')]

    images = []
    for f in file_names:
        img = Image.open(original_img_directory+'/'+a_dir +'/'+f)
        images.append(img.copy())
        #img.close() 

    # turn image into ndarray
    train_images = np.array([np.asarray(img) for img in images])

In [4]:
# Helper functions

#This function performns a leaky relu activation, which is needed for the discriminator network.
def leaky_relu(x, leak=leak, name="leaky_relu"):
    with tf.variable_scope(name):
        return tf.maximum(leak*x, x)

# get a batch of real images, change size from 28*28 to 32*32, return the processed batch
def get_x(batch_size):
    indices = np.random.randint(num_train_imgs, size=batch_size) # random sample real images
    batch = train_images[indices] 
    batch = 2*(batch/255.)-1 # change range from [0, 255] to [-1, 1]
    return batch

# wrapper function for real images
def save_real_images(images, size, image_path):
    concat_img = concat(images, size) # concatenate individual images to a grid of images
    concat_img = Image.fromarray(concat_img) # convert ndarray to an image object
    concat_img.save(image_path, 'PNG')

# wrapper function for generated images
def save_generated_images(images, size, image_path):
    images = inverse_transform(images)
    concat_img = concat(images, size) # concatenate individual images to a grid of images
    concat_img = Image.fromarray(concat_img) # convert ndarray to an image object
    concat_img.save(image_path, 'PNG')

# change values from [-1, 1] to [0, 255]
def inverse_transform(images):
    return (images+1.)/2. * 255

# concatenate individual images to a grid of images
def concat(images, size):
    # get height and width for a single image generated, e.g. 64 * 64
    height, width = images.shape[1], images.shape[2] 
    
    # placeholder for a concatenated img, which have 
    # img_height = image height * num images vertically, e.g. 64 * 5
    # img_width = image width * num images horizontally, e.g. 64 * 5
    img = np.zeros((height * size[0], width * size[1],3)) 

    # loop through each image
    for index, image in enumerate(images):
        j = index / size[0] # image row index
        i = index % size[1] # image column index
        img[j*height:j*height+height, i*width:i*width+width,:] = image

    return img.astype(np.uint8) # convert ndarray to uint8 type

In [49]:
# if not os.path.exists(train_sample_directory):
#     os.makedirs(train_sample_directory)

# indices = np.random.randint(num_train_imgs, size=batch_size_sample) # random sample real images
# batch = train_images[indices] # with shape [batch,64,64,3]
# save_real_images(images=batch, size=[sample_num_rows,sample_num_columns], image_path=train_sample_directory+'/fig_original.png')


In [5]:
# The generator network takes a noise vector z and return a 64 * 64 image. 
def generator(z, reuse=False, is_train=True):
    
    # Turn z into the first tensor + batch norm + relu
    # Creates a fully connected weight matrix, which is multiplied by the mini-batch of z vectors to 
    # produce a hidden layer with 4*4*1024 hidden nodes
    zP = slim.fully_connected(inputs=z,num_outputs=4*4*1024,normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},\
        reuse=reuse,activation_fn=tf.nn.relu,scope='g_init',weights_initializer=initializer)
    # Transform the flat 4*4*1024 layer into a tensor, whose kernel size is 4*4 and 512 kernels in total
    # -1 stands for the mini-batch size that's to be computed
    zCon = tf.reshape(zP,[-1,4,4,1024])
    
    # Perform fractionally-strided convolution/deconvolution + batch norm + relu
    # num_outputs: number of output filters/activation maps
    # kernel_size: [kernel_height, kernel_width]
    # stride: [stride_height, stride_width], in this case it is generating four pixels out of every pixel, this gives fractionally-strided convolution
    # output size batch_size*8*8*512
    gen1 = slim.convolution2d_transpose(\
        inputs=zCon,num_outputs=512,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},\
        reuse=reuse,activation_fn=tf.nn.relu,scope='g_conv1', weights_initializer=initializer)
    
    # output size batch_size*16*16*256
    gen2 = slim.convolution2d_transpose(\
        inputs=gen1,num_outputs=256,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},\
        reuse=reuse,activation_fn=tf.nn.relu,scope='g_conv2', weights_initializer=initializer)
    
    # output size batch_size*32*32*128
    gen3 = slim.convolution2d_transpose(\
        inputs=gen2,num_outputs=128,kernel_size=[5,5],stride=[2,2],\
        padding="SAME",normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},\
        reuse=reuse,activation_fn=tf.nn.relu,scope='g_conv3', weights_initializer=initializer)
    
    # output size batch_size*64*64*3
    g_out = slim.convolution2d_transpose(\
        inputs=gen3,num_outputs=3,kernel_size=[5,5],stride=[2,2],padding="SAME",\
        biases_initializer=None,activation_fn=tf.nn.tanh,\
        reuse=reuse,scope='g_output', weights_initializer=initializer)
    
    return g_out

In [6]:
# The discriminator network takes a 64*64 image and return a probability of whether it is real or generated
def discriminator(image, reuse=False, is_train=True):
    # Creates 32 4*4 filters to convolve on the mini-batch of 64*64*3 images, also perform batch norm + leaky ReLU activation
    # Note that no pooling is performed
    # stride here calculates one pixel out of every 2*2 pixels, this gives strided convolution that will shrink the image as a substitute for pooling
    # Set reuse=True allows discriminator to evaluate both real samples and generated samples 
    # Outputs size batch_size*32*32*32 
    
    dis1 = slim.convolution2d(inputs=image,num_outputs=32,kernel_size=[4,4],stride=[2,2],padding="SAME",\
        biases_initializer=None,activation_fn=leaky_relu,\
        reuse=reuse,scope='d_conv1',weights_initializer=initializer)
    
    # outputs size batch_size*16*16*64 
    dis2 = slim.convolution2d(inputs=dis1,num_outputs=64,kernel_size=[4,4],stride=[2,2],padding="SAME",\
        normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train}, activation_fn=leaky_relu,\
        reuse=reuse,scope='d_conv2', weights_initializer=initializer)
    
    # outputs size batch_size*8*8*128 
    dis3 = slim.convolution2d(inputs=dis2,num_outputs=128,kernel_size=[4,4],stride=[2,2],padding="SAME",\
        normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},activation_fn=leaky_relu,\
        reuse=reuse,scope='d_conv3',weights_initializer=initializer)
    
    # outputs size batch_size*4*4*256 
    dis4 = slim.convolution2d(inputs=dis3,num_outputs=256,kernel_size=[4,4],stride=[2,2],padding="SAME",\
        normalizer_fn=slim.batch_norm, normalizer_params={"is_training":is_train},activation_fn=leaky_relu,\
        reuse=reuse,scope='d_conv4',weights_initializer=initializer)

    # flatten the tensor to [batch_size, 4*4*256]
    dis_flat = slim.flatten(dis4)
    
    # create a fully connect layer with dis_flat and just one node in the output layer
    # note there's no batch normalization at this layer
    # outputs size batch_size*1 
    d_out = slim.fully_connected(inputs=dis_flat,num_outputs=1,\
        activation_fn=tf.nn.sigmoid, reuse=reuse, scope='d_output', weights_initializer=initializer)
    
    return d_out

In [53]:
z_size = 100 # size of initial noise vector that will be used for generator

# initialize all parameters of the networks
# weights were initialized from a zero-centered Normal distribution with standard deviation 0.02
# tf.truncated_normal returns random values from a normal distribution and made sure no value exceeds 2 std

initializer = tf.truncated_normal_initializer(stddev=0.02)

# placeholders for inputs into the generator and discriminator, respectively.
z_vector = tf.placeholder(shape=[batch_size,z_size],dtype=tf.float32, name='z_vectors') 
x_vector = tf.placeholder(shape=[batch_size,64,64,3],dtype=tf.float32, name='real_images') 
whether_is_train = tf.placeholder(tf.bool) # boolean variable indicating if it is in training mode or not

# ---- Pre-training ----

# the discriminator should output probability=1 for all the training images

train_labels=tf.constant(1.0,shape=(batch_size,1), name='pre_train_labels')

# feed images to the discriminator and return the predicted probability
d_pre_output = discriminator(x_vector)                                              
summary_pre_d_x_hist = tf.histogram_summary("pre_train_d_prob_x", d_pre_output)

d_pre_loss=tf.reduce_mean(tf.square(d_pre_output-train_labels))
summary_pre_d_loss = tf.scalar_summary("pre_train_d_loss", d_pre_loss)
# ---- end of Pre-training ----


# ---- DCGAN ----

g_output = tf.cond(whether_is_train, 
                   lambda: generator(z_vector,is_train=True), 
                   lambda: generator(z_vector,reuse=True,is_train=False))# generated mini-batch of images from noisy z vectors 
#g_output = generator(z_vector,is_train) # generated mini-batch of images from noisy z vectors 
    
d_output_x = discriminator(x_vector,reuse=True) # probabilities for real images
d_output_x = tf.maximum(tf.minimum(d_output_x, 0.99), 0.01) # avoid inf and -inf
summary_d_x_hist = tf.histogram_summary("d_prob_x", d_output_x)

d_output_z = discriminator(g_output,reuse=True) # probabilities for generated images
d_output_z = tf.maximum(tf.minimum(d_output_z, 0.99), 0.01) # avoid inf and -inf
# summary_d_z_hist = tf.histogram_summary("d_prob_z", d_output_z)

d_loss = -tf.reduce_mean(tf.log(d_output_x) + tf.log(1-d_output_z)) # loss for discriminator
# summary_d_loss = tf.scalar_summary("d_loss", d_loss)

g_loss = -tf.reduce_mean(tf.log(d_output_z)) # loss for generator
# summary_g_loss = tf.scalar_summary("g_loss", g_loss)

# the following parameter indices may change if the network structure changes
para_d = tf.trainable_variables()[:9] # parameters for discriminator
para_g = tf.trainable_variables()[9:] # parameters for generator

# only update parameters in discriminator during pre-training
pre_optimizer = tf.train.AdamOptimizer(learning_rate=alpha,beta1=beta1).minimize(d_pre_loss,var_list=para_d)
# only update the weights for the discriminator network
optimizer_op_d = tf.train.AdamOptimizer(learning_rate=alpha,beta1=beta1).minimize(d_loss,var_list=para_d)
# only update the weights for the generator network
optimizer_op_g = tf.train.AdamOptimizer(learning_rate=alpha,beta1=beta1).minimize(g_loss,var_list=para_g)

ValueError: Variable d_conv1/weights already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 246, in variable
    use_resource=use_resource)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
    return func(*args, **current_args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/contrib/framework/python/ops/variables.py", line 291, in model_variable
    use_resource=use_resource)


In [12]:
# Train the model

# create a log folder and save the graph structure, do this before training
g_writer = tf.train.SummaryWriter(logs_path + '/generator', graph=tf.get_default_graph())
d_writer = tf.train.SummaryWriter(logs_path + '/discriminator')

# saver saves and loads variables of the model to and from checkpoints, 
# which are binary files that maps variable names to tensor values
saver = tf.train.Saver(max_to_keep=20) 


with tf.Session(config=config) as sess:  
   --+ # variables need to be initialized before we can use them
    sess.run(tf.initialize_all_variables())
    #print [v.name for v in tf.trainable_variables()] # print all variable names
    
    
    # -------- pre-train discriminator --------
    start = time.time()
    for epoch in range(pre_train_epochs):
        x = get_x(batch_size) # get a batch of real images, with range [-1 ,1]
        
        d_pre_summary_merge = tf.merge_summary([summary_pre_d_loss, summary_pre_d_x_hist])
        summary_pre_d,_=sess.run([d_pre_summary_merge,pre_optimizer], feed_dict={x_vector: x})
        
        d_writer.add_summary(summary_pre_d, epoch) # add loss summary to tensorboard
        time_lapse = time.time()-start
        start = time.time()
        print "pre-train epoch: ", epoch,", time spent: %.2fs" % time_lapse
   
    print "pre-train done."
    
    
    
    # -------- jointly training discriminator and generator --------
    start = time.time()
    
    # z noise vector that will be used to generate image to check the training progress
    z_sample = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
    
    for epoch in range(training_epochs):
        # get a batch of real images, with range [-1 ,1]
        x = get_x(batch_size) 
        # mini-batch of noise data from [-1, 1]
        z = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
        
        
        if epoch <= 200:
            # make a directory for generated images
            if not os.path.exists(train_sample_directory):
+**                os.makedirs(train_sample_directory)
            # get a generated image, with range [-1, 1]
            g_images = sess.run(g_output,feed_dict={z_vector:z_sample}) 
            # substitute 1/5 of the training images to the generated images, to increase discriminator's difficulty 
            substitute_indices = np.random.randint(batch_size, size=batch_size/5) 
            x[substitute_indices] = g_images[substitute_indices]

        
        # Update the discriminator
        d_summary_merge = tf.merge_summary([summary_d_loss, summary_d_x_hist,summary_d_z_hist])
        summary_d,_ = sess.run([d_summary_merge,optimizer_op_d],feed_dict={z_vector:z, x_vector:x}) 
        
        
        if epoch<20:
            # Update the generator for k times in the specified n epoch
            for i in range(k):
                summary_g,_ = sess.run([summary_g_loss,optimizer_op_g],feed_dict={z_vector:z})
        else:
            summary_g,_ = sess.run([summary_g_loss,optimizer_op_g],feed_dict={z_vector:z})
        
        
        # add loss summary to tensorboard
        if epoch % 1 == 0:
            d_writer.add_summary(summary_d, epoch) 
            g_writer.add_summary(summary_g, epoch)
        
        # output generated image
        if epoch % 200 == 0 or epoch in [5,60,100,150]:
            time_lapse = time.time()-start
            start = time.time()
            
            print "DCGAN epoch: ", epoch,", time spent: %.2fs" % time_lapse
            g_images = sess.run(g_output,feed_dict={z_vector:z_sample}) # get a generated image, with range [-1, 1]
            
            # make a directory for generated images
            if not os.path.exists(train_sample_directory):
                os.makedirs(train_sample_directory)
            
            #Save sample generator images for viewing training progress.
            save_generated_images(images = np.reshape(g_images[0:batch_size_sample],[batch_size_sample,64,64,3]),\
                        size = [sample_num_rows,sample_num_columns], image_path = train_sample_directory+'/'+str(epoch)+'.png')
            
        if epoch in [0,500,1000,2000,4000,6000,8000,10000,15000]:
            # make a directory for trained models
            if not os.path.exists(model_directory):
                os.makedirs(model_directory)
            
            # save the trained model at different epoch
            saver.save(sess, save_path = model_directory + '/' + str(epoch) + '.cptk')
    print "Done"

SyntaxError: Missing parentheses in call to 'print' (<ipython-input-12-1c02b0afa811>, line 29)

In [13]:
# continue the model from the last check point

saver = tf.train.Saver(max_to_keep=20)

In [14]:
ckpt = tf.train.get_checkpoint_state(model_directory)
ckpt.model_checkpoint_path

AttributeError: 'NoneType' object has no attribute 'model_checkpoint_path'

In [15]:
print ('Loading models...might take a minute')
saver = tf.train.Saver(max_to_keep=20)

# create a log folder and save the graph structure, do this before training
g_writer = tf.train.SummaryWriter(logs_path + '/generator', graph=tf.get_default_graph())
d_writer = tf.train.SummaryWriter(logs_path + '/discriminator')

config = tf.ConfigProto()
config.gpu_options.allow_growth=True

with tf.Session(config=config) as sess:  
    start = time.time()
    
    ckpt = tf.train.get_checkpoint_state(model_directory)
    model = ckpt.model_checkpoint_path

    # z noise vector that will be used to generate image to check the training progress
    z_sample = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
    
    saver.restore(sess, save_path=model)
    
    for epoch in range(14500,training_epochs):
        # get a batch of real images, with range [-1 ,1]
        x = get_x(batch_size) 
        # mini-batch of noise data from [-1, 1]
        z = np.random.uniform(-1.0,1.0,size=[batch_size,z_size]).astype(np.float32)
        
        # Update the discriminator
        d_summary_merge = tf.merge_summary([summary_d_loss, summary_d_x_hist,summary_d_z_hist])
        summary_d,_ = sess.run([d_summary_merge,optimizer_op_d],feed_dict={z_vector:z, x_vector:x}) 
        
        # Update the generator
        summary_g,_ = sess.run([summary_g_loss,optimizer_op_g],feed_dict={z_vector:z})
        
        # add loss summary to tensorboard
        if epoch % 20 == 0:
            d_writer.add_summary(summary_d, epoch) 
            g_writer.add_summary(summary_g, epoch)
        
        # output generated image
        if epoch % 200 == 0:
            time_lapse = time.time()-start
            start = time.time()
            
            print "DCGAN epoch: ", epoch,", time spent: %.2fs" % time_lapse
            g_images = sess.run(g_output,feed_dict={z_vector:z_sample}) # get a generated image, with range [-1, 1]
            
            # make a directory for generated images
            if not os.path.exists(train_sample_directory):
                os.makedirs(train_sample_directory)
            
            #Save sample generator images for viewing training progress.
            save_generated_images(images = np.reshape(g_images[0:batch_size_sample],[batch_size_sample,64,64,3]),\
                        size = [sample_num_rows,sample_num_columns], image_path = train_sample_directory+'/'+str(epoch)+'.png')
            
        if epoch in [16500,17000]:
            # make a directory for trained models
            if not os.path.exists(model_directory):
                os.makedirs(model_directory)
            
            # save the trained model at different epoch
            saver.save(sess, save_path = model_directory + '/' + str(epoch) + '.cptk')
    print ("Done")


SyntaxError: Missing parentheses in call to 'print' (<ipython-input-15-2741f1a182f3>, line 1)