## Setup

In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:

!pip --quiet install -U tensorboard-plugin-profile
!pip --quiet install wandb


In [47]:

from numpy import zeros, ones, asarray
from numpy.random import randn, randint
import tensorflow as tf

from tensorflow.keras.datasets.cifar10 import load_data
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import Dense, Reshape, Flatten, Conv2D, Conv2DTranspose, LeakyReLU, Dropout
import tensorflow_datasets as tfds
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
from tensorflow.keras.models import load_model
from numpy.random import randn
from time import time
from tensorflow.keras import backend as K
import numpy as np
from itertools import islice

import wandb
from wandb.keras import WandbCallback

import pathlib
from google.colab import files


import imageio
import shutil
from os import mkdir, listdir, path, environ
import re

device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [48]:
dataset_directory = "/content/img_align_celeba/"
INPUT_SHAPE = (112,128,3)
DATE = "09-05"
DATASET_SIZE = 202599
model_directory = "/content/drive/MyDrive/models/10epochs" 

environ['WANDB_API_KEY '] = "d136db4e1c22c6659a9bbdee119442d4c564a007"
#!wandb login

In [49]:
LP_PATH = "/content/drive/MyDrive/models/latent_points_dim100"
#LATENT_POINTS = tf.random.normal([5, 100]) 
#np.save(LP_PATH+".npy",LATENT_POINTS.numpy())
LATENT_POINTS = np.load(LP_PATH+".npy")

In [50]:


sweep_config = {
    'name':'sweep_1',
    'method':'random'
}

parameters_dic = {
    'epochs':{'value':5},
    'batch_size':{'values':[64,128]},
    'latent_dim':{'values':[50,100,200]},
    'gen_struc':{'values':[0,1,2]},
    'disc_struc':{'values':[0,1,2]},
}
sweep_config['parameters'] = parameters_dic
#sweep_id = wandb.sweep(sweep_config, project="CelebA-GAN-sweep")


In [51]:
!unzip -n -q "/content/drive/MyDrive/datasets/img_align_celeba.zip" 

##Main code

### Import data

In [52]:
def make_dataset(batch_size = 64 ):
  dataset = tf.keras.preprocessing.image_dataset_from_directory(
      dataset_directory, label_mode=None, image_size=(112, 128), batch_size=batch_size,
  ) #3min

  dataset = dataset.map(lambda x : x / 255.0, num_parallel_calls= tf.data.experimental.AUTOTUNE)
  dataset = dataset.map(lambda x : (x*2)-1, num_parallel_calls= tf.data.experimental.AUTOTUNE)
  dataset = dataset.prefetch(buffer_size= tf.data.experimental.AUTOTUNE)
  return dataset

### Define models

In [53]:

def define_discriminator(config,in_shape=INPUT_SHAPE) :
  model = Sequential()
  if config['disc_struc'] == 0 :
    model.add(Conv2D(128, (3,3), strides=(2,2), padding = 'same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1,activation="sigmoid"))


  elif config['disc_struc'] == 1 :
    model.add(Conv2D(256, (3,3), strides=(2,2), padding = 'same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Conv2D(256, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1,activation="sigmoid"))

  elif config['disc_struc'] == 2 :
    model.add(Conv2D(128, (3,3), strides=(2,2), padding = 'same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))

    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1,activation="sigmoid"))

  #opt = Adam(learning_rate=0.0002, beta_1=0.5)
  opt =  Adam(learning_rate=0.0002, beta_1=0.5)
  model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
  return model


In [54]:
def define_generator(config):    #latent_dim is the dimension of the latent vector (e.g., 100)
	model = Sequential()
	
	if config['gen_struc'] == 0 :
		n_nodes = 64 * 28 * 32  
		model.add(Dense(n_nodes, input_dim=config['latent_dim'])) #Dense layer so we can work with 1D latent vector
		model.add(LeakyReLU(alpha=0.2))
		model.add(Reshape((28, 32, 64)))  
		model.add(Conv2DTranspose(64, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
		model.add(Conv2DTranspose(64, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
	elif config['gen_struc'] == 1 : 
		n_nodes = 512 * 28 * 32  
		model.add(Dense(n_nodes, input_dim=config['latent_dim'])) #Dense layer so we can work with 1D latent vector
		model.add(LeakyReLU(alpha=0.2))
		model.add(Reshape((28, 32, 512)))  
		model.add(Conv2DTranspose(256, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
		model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
	elif config['gen_struc'] == 2 : 
		n_nodes = 128 * 14 * 16  
		model.add(Dense(n_nodes, input_dim=config['latent_dim'])) #Dense layer so we can work with 1D latent vector
		model.add(LeakyReLU(alpha=0.2))
		model.add(Reshape((14,16, 128)))  
		model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
		model.add(Conv2DTranspose(64, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
		model.add(Conv2DTranspose(64, (4,4), strides=(2,2), padding='same'))
		model.add(LeakyReLU(alpha=0.2))
	# generate
	model.add(Conv2D(3, (8,8), activation='tanh', padding='same'))
	return model  #Model not compiled as it is not directly trained like the discriminator.
                    #Generator is trained via GAN combined model. 

#test_gen = define_generator(config)
#print(test_gen.summary())


In [55]:
# define the combined generator and discriminator model, for updating the generator
#Discriminator is trained separately so here only generator will be trained by keeping
#the discriminator constant. 
def define_gan(generator, discriminator, config):
		discriminator.trainable = False  #Discriminator is trained separately. So set to not trainable.
		# connect generator and discriminator
		#opt = Adam(learning_rate=0.0002, beta_1=0.5)
		opt = Adam(learning_rate=0.0002, beta_1=0.5)
		model = Sequential()
		model.add(generator)
		model.add(discriminator)
		model.compile(loss='binary_crossentropy', optimizer=opt)
		return model

def make_model(config) :
  d_model = define_discriminator(config)
	# create the generator
  g_model = define_generator(config)
	# create the gan
  gan_model = define_gan(g_model, d_model, config)
  return gan_model, g_model, d_model

#def save(path,gan, generator, discriminator):
    #discriminator.trainable = False
#    save_model(gan, path+'gan')
#    #discriminator.trainable = True
#    save_model(generator, path+'generator')
#    save_model(discriminator, path+'discriminator')


#def load(path):
#    discriminator = load_model(path+'discriminator')
#    generator = load_model(path+'generator')
#    gan = load_model(path+'gan')
#    gan.summary()
#    discriminator.summary()
#    generator.summary()

#    return gan, generator, discriminator

###Functions

In [56]:
def custom_lr_scheduler(epoch, starting_lr) :
  if epoch < 10 :
    return starting_lr * epoch + starting_lr
  if epoch < 30 :
    return starting_lr * 10
  return starting_lr * 10 * ( 1 - ((epoch-30)/19 ))

In [57]:
# pick a batch of random real samples to train the GAN
#In fact, we will train the GAN on a half batch of real images and another 
#half batch of fake images. 


# generate n_samples number of latent vectors as input for the generator

@tf.function
def generate_latent_points(latent_dim, n_samples):
	# generate points in the latent space
	#x_input = randn(latent_dim * n_samples)
	x_input = tf.random.normal([n_samples, latent_dim]) 
	# reshape into a batch of inputs for the network
	#x_input = x_input.reshape(n_samples, latent_dim)
	return x_input

# use the generator to generate n fake examples, with class labels
#Supply the generator, latent_dim and number of samples as input.
#Use the above latent point generator to generate latent points. 
@tf.function
def generate_fake_samples(generator, latent_dim, n_samples):
	# generate points in latent space
	x_input = generate_latent_points(latent_dim, n_samples)
	# predict using generator to generate fake samples. 
	X = generator(x_input)
	# Class labels will be 0 as these samples are fake. 
	y = tf.zeros((n_samples, 1))  #Label=0 indicating they are fake
	return X, y

In [58]:
def model_save_directory(path, epoch) : 
  return path+str(epoch)+"epochs/"

In [59]:
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]

def dl_gif(images_path):
  if not path.exists('run_sample'):
    mkdir('run_sample')
  for i in range(1,6) :
    r = re.compile(f'samples_{i}_')
    newlist = list(filter(r.match, listdir(images_path)))
    if not path.exists(f'run_sample/{i}'):
      mkdir(f'run_sample/{i}/')
      for image in newlist :  
        shutil.move(images_path+image, f'run_sample/{i}/'+image)
    filenames = listdir(f'run_sample/{i}/')
    filenames.sort(key=natural_keys)
    images = []

    for filename in filenames:
        images.append(imageio.imread(f'run_sample/{i}/'+filename))
    imageio.mimsave(f'gif_{i}.gif', images, fps=10)
    files.download(f'gif_{i}.gif')

In [60]:
def load_gan(config, model_directory) :
  gan_model, g_model, d_model = make_model(config)
  latent_points = []
  checkpoint = tf.train.Checkpoint(gan_optimizer=gan_model.optimizer,
                                    discriminator_optimizer=d_model.optimizer,
                                    generator=g_model,
                                    discriminator=d_model,
                                    gan_model = gan_model,
                                    config = config)
  checkpoint.restore(model_directory)
  return gan_model, g_model, d_model

def save_gan(gan_model, g_model, d_model, model_directory, config) :
  checkpoint = tf.train.Checkpoint(gan_optimizer=gan_model.optimizer,
                                    discriminator_optimizer=d_model.optimizer,
                                    generator=g_model,
                                    discriminator=d_model,
                                    gan_model = gan_model,
                                    config = config)
  ckpt_manager = tf.train.CheckpointManager(checkpoint, model_directory, max_to_keep=1)
  ckpt_manager.save()

###Train epoch

In [61]:
# train the generator and discriminator
#We loop through a number of epochs to train our Discriminator by first selecting
#a random batch of images from our true/real dataset.
#Then, generating a set of images using the generator. 
#Feed both set of images into the Discriminator. 
#Finally, set the loss parameters for both the real and fake images, as well as the combined loss. 
def train_epoch(g_model, d_model, gan_model, dataset, model_config, training_config, summary_writer):
	# manually enumerate epochs and bacthes. 
	latent_dim = model_config['latent_dim']
	batch_size = model_config['batch_size']
	num_epoch = training_config['number_epochs']

	if training_config['sanity_check'] :

		ds_iterator = [(i,x) for (i,x) in enumerate(tqdm(dataset)) if i<10]
	elif training_config['verbose'] == 1 :
		ds_iterator = enumerate(tqdm(dataset))
	else :
		ds_iterator = enumerate(dataset)
	
	# enumerate batches over the training set
	for j, batch in ds_iterator:
		#with tf.profiler.experimental.Trace('profiler', step_num=step, _r=1):
			X_real, y_real = batch, tf.ones([len(batch),1])
			# update discriminator model weights
						##train_on_batch allows you to update weights based on a collection 
						#of samples you provide
						#Let us just capture loss and ignore accuracy value (2nd output below)
			d_loss_real, _ = d_model.train_on_batch(X_real, y_real) 

						# generate 'fake' examples
			X_fake, y_fake = generate_fake_samples(g_model, latent_dim, batch_size)
			# update discriminator model weights
			d_loss_fake, _ = d_model.train_on_batch(X_fake, y_fake)
						
						#d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) #Average loss if you want to report single..
						
			# prepare points in latent space as input for the generator
			X_gan = generate_latent_points(latent_dim, batch_size)
							
						# The generator wants the discriminator to label the generated samples
				# as valid (ones)
				#This is where the generator is trying to trick discriminator into believing
				#the generated image is true (hence value of 1 for y)			
			y_gan = ones((batch_size, 1))
						
						# Generator is part of combined model where it got directly linked with the discriminator
				# Train the generator with latent_dim as x and 1 as y. 
				# Again, 1 as the output as it is adversarial and if generator did a great
				#job of folling the discriminator then the output would be 1 (true)
			# update the generator via the discriminator's error
			g_loss1 = gan_model.train_on_batch(X_gan, y_gan)
	 
			X_gan2 = generate_latent_points(latent_dim, batch_size)
			g_loss2 = gan_model.train_on_batch(X_gan2, y_gan)
			if summary_writer != None :
				with summary_writer.as_default():
					tf.summary.scalar('d_loss_real', d_loss_real, step=j)
					tf.summary.scalar('d_loss_fake', d_loss_fake, step=j)
					tf.summary.scalar('g_loss', (g_loss1+g_loss2)/2, step=j)
						# Print losses on this batch
			if training_config['verbose']  == 2 :
				print(f'Epoch>{num_epoch+1}, batch={j}, d1={d_loss_real}, d2={d_loss_fake} g={(g_loss1+g_loss2)/2}')
	# save the generator model

	if training_config['verbose']  == 1 :
				print(f'Epoch>{num_epoch+1}, d1={d_loss_real}, d2={d_loss_fake} g={(g_loss1+g_loss2)/2}')


def train(model_config, training_config) :
	#wandb.tensorboard.patch(root_logdir="content/log")
	latent_points = training_config['latent_points']
	dataset = make_dataset(model_config['batch_size'])
	if training_config['starting_epoch'] == 0 :
		gan_model, g_model, d_model = make_model(model_config)
	else :
		model_dir = model_save_directory(training_config['model_directory'], training_config['starting_epoch'])
		model_dir = tf.train.latest_checkpoint(model_dir)
		gan_model, g_model, d_model = load_gan(model_config,model_dir )
	avg_time = 0

	summary_writer = tf.summary.create_file_writer('content/log/losses')

	for epoch in tf.range(training_config['starting_epoch'],training_config['starting_epoch']+training_config['number_epochs']) :
		starting_time = time()

		K.set_value(d_model.optimizer.learning_rate, training_config["learning_rate_disc"])
		K.set_value(gan_model.optimizer.learning_rate, training_config["learning_rate_gan"])
		
		train_epoch(g_model, d_model, gan_model, dataset, 
								model_config, training_config, 
								summary_writer = summary_writer)
		delta_time = time() - starting_time
		avg_time += delta_time
		X = g_model.predict(latent_points)
		X = (X+1)/2
		X = (X*255).astype(np.uint8)
		wandb.log({"samples_1": wandb.Image(X[0], caption=f'sample {1}, epoch {epoch}'),
							"samples_2": wandb.Image(X[1], caption=f'sample {2}, epoch {epoch}'), 
							"samples_3": wandb.Image(X[2], caption=f'sample {3}, epoch {epoch}'), 
							"samples_4": wandb.Image(X[3], caption=f'sample {4}, epoch {epoch}'), 
							"samples_5": wandb.Image(X[4], caption=f'sample {5}, epoch {epoch}'), 
								'duration':delta_time,'epoch':epoch+1, 
								'learning rate discriminator':training_config["learning_rate_disc"],
								'learning rate gan':training_config["learning_rate_gan"]
								})
	wandb.summary['avg_time'] = avg_time/training_config['number_epochs']
	save_gan(gan_model, g_model, d_model, model_save_directory(training_config['model_directory'], training_config['starting_epoch']+training_config['number_epochs']), model_config)
	if training_config['dl_gif'] :
		dl_gif('/content/wandb/latest-run/files/media/images/')


##Sweep

In [62]:
# Plot generated images 
def show_plot(examples, n):
	for i in range(n * n):
		plt.subplot(n, n, 1 + i)
		plt.axis('off')
		plt.imshow(examples[i, :, :, :])
	plt.show()

In [63]:


def train_sweep() :
  with wandb.init() as run:
    config = wandb.config
    latent_points = generate_latent_points(config['latent_dim'],5)
    dataset = make_dataset(config['batch_size'])
    gan_model, g_model, d_model = make_model(config)

    avg_time = 0
    for epoch in tensorflow.range(config['epochs']) :
      starting_time = time()
      train_epoch(g_model, d_model, gan_model, dataset, config['latent_dim'],n_batch=config['batch_size'], num_epoch= epoch)
      delta_time = time() - starting_time
      avg_time += delta_time
      X = g_model.predict(latent_points)
      X = (X+1)/2
      X = (X*255).astype(np.uint8)
      wandb.log({"samples":[wandb.Image(img, caption=f'sample {id}, epoch {epoch}') for id,img in enumerate(X)],
                 'duration':delta_time,'epoch':epoch+1})
    wandb.summary['avg_time'] = avg_time/config['epochs']

## Run sweep

In [64]:
#wandb.agent("36gxyv90", train_sweep, count = 10, project="CelebA-GAN-sweep")

##Long run

In [65]:
model_config = {
    'batch_size':64,
    'latent_dim':100,
    'gen_struc':2,
    'disc_struc':2,
}
training_config = {
    'starting_epoch':90,
    'number_epochs':10,
    'sanity_check':False,
    'verbose':1,
    'dl_gif':False,
    'learning_rate_disc':0.000005,
    'learning_rate_gan':0.00004,
    'model_directory':"/content/drive/MyDrive/models/",
    'latent_points':LATENT_POINTS
}
#wandb.tensorboard.patch(root_logdir="content/log")
#wandb.init(config = model_config, project = "CelebA-GAN-sweep",sync_tensorboard=True, resume = "must", name="AXI V1.1", id="2epv65u7")

#train(model_config, training_config)

##Unit tests

In [66]:
model_config = {
    'batch_size':64,
    'latent_dim':100,
    'gen_struc':2,
    'disc_struc':2,
    'sanity_check':False
}
training_config = {
    'starting_epoch':0,
    'number_epochs':1,
    'sanity_check':True,
    'verbose':1,
    'dl_gif':False,
    'learning_rate':0.0001,
    'model_directory':"/content/drive/MyDrive/models/"
}
#wandb.tensorboard.patch(root_logdir="content/log")
wandb.init(config = model_config, project = "CelebA-GAN-sweep",sync_tensorboard=True)

#train(model_config, training_config)

training_config = {
    'starting_epoch':1,
    'number_epochs':1,
    'sanity_check':True,
    'verbose':1,
    'dl_gif':False,
    'learning_rate':0.0001,
    'model_directory':"/content/drive/MyDrive/models/"
}
#train(model_config, training_config)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…