<a href="https://colab.research.google.com/github/ShendreAk/DeepLearningCNN/blob/main/stackGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##The model architecture of StackGAN consists of mainly the following components:

Embedding: Converts the input variable length text into a fixed length vector. we will be using a pre-trained character level embedding.

Conditioning Augmentation (CA)

Stage I Generator: Generates low resolution (64*64) images.

Stage I Discriminator

Residual Blocks

Stage II Generator: Generates high resolution (256*256) images.

Stage II Discriminator

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
%cd "/content/drive/MyDrive/StackGAN/"

/content/drive/MyDrive/StackGAN


In [11]:
!pwd

/content/drive/MyDrive/StackGAN


In [12]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import PIL
from PIL import Image
import pickle
import tensorflow as tf
from tensorflow.keras.models import  Model
from tensorflow.keras import Input
from tensorflow.keras.layers import  Dense, LeakyReLU, BatchNormalization, ReLU, Reshape, UpSampling2D, Conv2D, Activation, \
 Flatten, Lambda, Concatenate, concatenate
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam
%matplotlib inline


# Data loading

In [13]:
def load_filenames(fname_path):
  with open(fname_path, 'rb') as f:
    filenames = pickle.load(f, encoding='latin1' )
    filenames = filenames[:500]
  return filenames

In [14]:
def load_class_ids(classids_path):
  with open(classids_path, 'rb') as f:
    class_ids = pickle.load(f, encoding='latin1' )
    class_ids = class_ids[:500]
  return class_ids

In [15]:
def load_embeddings(embed_path):
  with open(embed_path, 'rb') as f:
    embeddings = pickle.load(f, encoding='latin1' )
    embeddings = np.array(embeddings[:500])
  return embeddings

In [16]:
cub_dir = "./content/drive/MyDrive/StackGAN/CUB_200_2011/CUB_200_2011/"
fname_path = "/content/drive/MyDrive/StackGAN/birds/train/filenames.pickle"
classids_path = '/content/drive/MyDrive/StackGAN/birds/train/class_info.pickle'
embed_path = "/content/drive/MyDrive/StackGAN/birds/train/char-CNN-RNN-embeddings.pickle"


In [17]:
def load_bboxes(dir):
  bbox_path = os.path.join(dir, 'bounding_boxes.txt')
  images_file_path = os.path.join(dir, 'images.txt')

  df_bboxes = pd.read_csv(bbox_path,delim_whitespace=True, header=None).astype(int)
  df_filenames = pd.read_csv(images_file_path, delim_whitespace=True, header=None)

  # list of filenames
  file_names = df_filenames[1].tolist()

  # create dictionary
  fname_bbox_dict = {}
  for i in range(len(file_names)):
    key =  file_names[i][:-4]
    bbox = df_bboxes.iloc[i][1:].to_list()
    fname_bbox_dict[key] = bbox

  return fname_bbox_dict




In [18]:
def get_img(img_path, bbox, image_size):
    """
    Load and resize image
    """
    img = Image.open(img_path).convert('RGB')
    width, height = img.size
    if bbox is not None:
        R = int(np.maximum(bbox[2], bbox[3]) * 0.75)
        center_x = int((2 * bbox[0] + bbox[2]) / 2)
        center_y = int((2 * bbox[1] + bbox[3]) / 2)
        y1 = np.maximum(0, center_y - R)
        y2 = np.minimum(height, center_y + R)
        x1 = np.maximum(0, center_x - R)
        x2 = np.minimum(width, center_x + R)
        img = img.crop([x1, y1, x2, y2])
    img = img.resize(image_size, PIL.Image.BILINEAR)
    return img

In [19]:
embeddings = load_embeddings(embed_path)

In [20]:
embeddings[1].shape

(10, 1024)

In [21]:
def load_dataset(fname_path, classids_path, embed_path, cub_dir, image_size):
  filenames = load_filenames(fname_path)
  class_ids = load_class_ids(classids_path)
  all_embeddings = load_embeddings(embed_path)
  bounding_boxes = load_bboxes(cub_dir)

  X, y, embeddings = [], [], []

  for index, filename in enumerate(filenames):
    bbox = bounding_boxes[filename]

    # Load images
    img_name = f'{cub_dir}/images/{filename}.jpg'
    img = get_img(img_name, bbox, image_size)
    X.append(np.array(img))

    y.append(class_ids[index])


    embeddings1 = all_embeddings[index, :, :]
    iloc = random.randint(0, embeddings1.shape[0] - 1)
    embedding = embeddings1[iloc, :]
    embeddings.append(embedding)

  return np.array(X), np.array(y), np.array(embeddings)



##Stage I architecture

In [22]:
def generate_c(input):
  mean = input[:,:128]
  # print("mean shape",mean.shape[1])
  logsigma = input[:,128:]
  stddev = K.exp(logsigma)
  epsilon = K.random_normal(shape=K.constant((mean.shape[1],), dtype='int32'))
  c = stddev * epsilon + mean
  # print("c shape: ",c.shape)
  return c


In [23]:
def ca_model():
    input_layer = Input(shape=(1024,))
    x = Dense(256)(input_layer)
    x = LeakyReLU(alpha=0.2)(x)
    model = Model(inputs=[input_layer], outputs=[x])
    return model

In [57]:
def embedding_compressor_model():
    input_layer = Input(shape=(1024,))
    x = Dense(128)(input_layer)
    x = ReLU()(x)

    model = Model(inputs=[input_layer], outputs=[x])
    return model

In [25]:
def stage1_generator():
  input_layer = Input(shape=(1024,))
  x = Dense(256)(input_layer)
  mean_logsigma = LeakyReLU(alpha=0.2)(x)

  c = Lambda(generate_c)(mean_logsigma)

  input_layer2 = Input(shape=(100,))

  gen_input = Concatenate(axis=1)([c, input_layer2])

  x = Dense(128 * 4 * 4, use_bias=False)(gen_input)
  x = ReLU()(x)

  x = Reshape((4, 4, 128), input_shape=(128 * 4 * 4,))(x)

  x = UpSampling2D(size=(2, 2))(x)
  x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
  x = BatchNormalization()(x)
  x = ReLU()(x)

  x = UpSampling2D(size=(2, 2))(x)
  x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
  x = BatchNormalization()(x)
  x = ReLU()(x)

  x = UpSampling2D(size=(2, 2))(x)
  x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
  x = BatchNormalization()(x)
  x = ReLU()(x)

  x = UpSampling2D(size=(2, 2))(x)
  x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
  x = BatchNormalization()(x)
  x = ReLU()(x)

  x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
  x = Activation(activation='tanh')(x)


  model = Model(inputs=[input_layer, input_layer2], outputs=[x, mean_logsigma])
  return model


In [26]:
model = stage1_generator()
model.summary()

  super().__init__(**kwargs)


In [27]:
def stage1_discriminator():

    input_layer = Input(shape=(64, 64, 3))

    x = Conv2D(64, (4, 4),padding='same', strides=2, input_shape=(64, 64, 3), use_bias=False)(input_layer)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(128, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(256, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(512, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    input_layer2 = Input(shape=(4, 4, 128))

    merged_input = concatenate([x, input_layer2])

    x2 = Conv2D(64 * 8, kernel_size=1,padding="same", strides=1)(merged_input)
    x2 = BatchNormalization()(x2)
    x2 = LeakyReLU(alpha=0.2)(x2)
    x2 = Flatten()(x2)
    x2 = Dense(1)(x2)
    x2 = Activation('sigmoid')(x2)

    model = Model(inputs=[input_layer, input_layer2], outputs=[x2])
    return model

In [28]:
model = stage1_discriminator()
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [29]:
# adversial model to calculate stage1 generative model losses
def adversarial_model(gen_model, dis_model):
    input_layer = Input(shape=(1024,))
    input_layer2 = Input(shape=(100,))
    input_layer3 = Input(shape=(4, 4, 128))

    x, mean_logsigma = gen_model([input_layer, input_layer2])

    dis_model.trainable = False
    preds = dis_model([x, input_layer3])

    model = Model(inputs=[input_layer, input_layer2, input_layer3], outputs=[preds, mean_logsigma])
    return model

In [30]:
def KL_loss(y_true, y_pred):
    mean = y_pred[:, :128]
    logsigma = y_pred[:, :128]
    loss = -logsigma + .5 * (-1 + K.exp(2. * logsigma) + K.square(mean))
    loss = K.mean(loss)
    return loss


def custom_generator_loss(y_true, y_pred):
    # Calculate binary cross entropy loss
    return K.binary_crossentropy(y_true, y_pred)

In [31]:
def save_img(img, path):
    plt.figure()
    plt.subplot(1, 1, 1)
    plt.imshow(img)
    plt.axis("off")
    plt.savefig(path)
    plt.close()

In [32]:
data_dir = "/content/drive/MyDrive/StackGAN/birds"
cub_dataset_dir = "/content/drive/MyDrive/StackGAN/CUB_200_2011/CUB_200_2011"

train_dir = data_dir + "/train"
test_dir = data_dir + "/test"

embed_path_train = train_dir + "/char-CNN-RNN-embeddings.pickle"
embed_path_test = test_dir + "/char-CNN-RNN-embeddings.pickle"

filenames_path_train = train_dir + "/filenames.pickle"
filenames_path_test = test_dir + "/filenames.pickle"

class_ids_path_train = train_dir + "/class_info.pickle"
class_ids_path_test = test_dir + "/class_info.pickle"

results_path = "/content/drive/MyDrive/StackGAN/results"
image_size = 64
batch_size = 32
z_dim = 100
stage1_g_lr = 0.0002
stage1_d_lr = 0.0002
stage1_lr_decay_step = 600
epochs = 25
condition_dim = 128

# optimizers
d_optimizer = Adam(learning_rate=stage1_d_lr, beta_1=0.5, beta_2=0.999)
g_optimizer = Adam(learning_rate=stage1_g_lr, beta_1=0.5, beta_2=0.999)

In [25]:
# load datasets
X_train, y_train, embeddings_train = load_dataset(fname_path=filenames_path_train,
                                                      classids_path=class_ids_path_train,
                                                      embed_path=embed_path_train,
                                                      cub_dir=cub_dataset_dir,
                                                      image_size=(64, 64))

X_test, y_test, embeddings_test = load_dataset(fname_path=filenames_path_test,
                                                classids_path=class_ids_path_test,
                                                embed_path=embed_path_test,
                                                cub_dir=cub_dataset_dir,
                                                image_size=(64, 64))

  df_bboxes = pd.read_csv(bbox_path,delim_whitespace=True, header=None).astype(int)
  df_filenames = pd.read_csv(images_file_path, delim_whitespace=True, header=None)
  df_bboxes = pd.read_csv(bbox_path,delim_whitespace=True, header=None).astype(int)
  df_filenames = pd.read_csv(images_file_path, delim_whitespace=True, header=None)


In [33]:
# model building and compiling
ca_model = ca_model()
ca_model.compile(loss="binary_crossentropy", optimizer="adam")

embedding_compressor_model = embedding_compressor_model()
embedding_compressor_model.compile(loss="binary_crossentropy", optimizer="adam")

stage1_dis = stage1_discriminator()
stage1_dis.compile(loss='binary_crossentropy', optimizer=d_optimizer)

stage1_gen = stage1_generator()
stage1_gen.compile(loss="mse", optimizer=g_optimizer)

adversarial_model = adversarial_model(gen_model=stage1_gen, dis_model=stage1_dis)
adversarial_model.compile(loss=['binary_crossentropy',KL_loss], loss_weights=[1, 2.0],
                          optimizer=g_optimizer, metrics=None)

In [34]:
# array of real and fake values for loss calculation
real_labels = np.ones((batch_size, 1), dtype=float)
fake_labels = np.zeros((batch_size, 1), dtype=float)

In [None]:
# For stage1 architecture

for epoch in range(epochs):

    gen_losses = []
    dis_losses = []

    # Load data and train model
    num_batches = int(X_train.shape[0] / batch_size)
    for index in range(num_batches):
        print(f"Batch no:{index+1}")

        # Sample a batch of data
        z_noise = np.random.normal(0, 1, size=(batch_size, z_dim))
        image_batch = X_train[index * batch_size:(index + 1) * batch_size]
        embedding_batch = embeddings_train[index * batch_size:(index + 1) * batch_size]
        image_batch = (image_batch - 127.5) / 127.5

        # Generate fake images
        fake_images, _ = stage1_gen.predict([embedding_batch, z_noise], verbose=3)

        # Generate compressed embeddings
        compressed_embedding = embedding_compressor_model.predict_on_batch(embedding_batch)
        compressed_embedding = np.reshape(compressed_embedding, (-1, 1, 1, condition_dim))
        compressed_embedding = np.tile(compressed_embedding, (1, 4, 4, 1))

        dis_loss_real = stage1_dis.train_on_batch([image_batch, compressed_embedding],
                                                  np.reshape(real_labels, (batch_size, 1)))
        dis_loss_fake = stage1_dis.train_on_batch([fake_images, compressed_embedding],
                                                  np.reshape(fake_labels, (batch_size, 1)))

        d_loss = np.add(dis_loss_real,  dis_loss_fake)
        dis_losses.append(d_loss)
        print(f"d_loss:{d_loss}")


        g_loss = adversarial_model.train_on_batch([embedding_batch, z_noise, compressed_embedding],[K.ones((batch_size, 1)), K.ones((batch_size, 256))])
        print(f"g_loss:{g_loss}")
        gen_losses.append(g_loss)


    # Generate and save images every 2nd epoch
    if epoch % 2 == 0:
        z_noise2 = np.random.normal(0, 1, size=(batch_size, z_dim))
        embedding_batch = embeddings_test[0:batch_size]
        fake_images, _ = stage1_gen.predict_on_batch([embedding_batch, z_noise2])
        # save_img
        for i, img in enumerate(fake_images[:10]):
            save_img(img, f"/content/drive/MyDrive/StackGAN/results/gen_{epoch}_{i}.png")

    # Save models
    stage1_gen.save_weights("./weights/G1.weights.h5")
    stage1_dis.save_weights("./weights/D1.weights.h5")

Batch no:1
d_loss:1.4236767292022705
g_loss:[array(0.7120035, dtype=float32), array(0.7120035, dtype=float32), array(0.7120035, dtype=float32)]
Batch no:2
d_loss:1.4240310192108154
g_loss:[array(0.7121684, dtype=float32), array(0.7121684, dtype=float32), array(0.7121684, dtype=float32)]
Batch no:3
d_loss:1.4243066310882568
g_loss:[array(0.7123002, dtype=float32), array(0.7123002, dtype=float32), array(0.7123002, dtype=float32)]
Batch no:4
d_loss:1.42460036277771
g_loss:[array(0.7124531, dtype=float32), array(0.7124531, dtype=float32), array(0.7124531, dtype=float32)]
Batch no:5
d_loss:1.4248775243759155
g_loss:[array(0.71260077, dtype=float32), array(0.71260077, dtype=float32), array(0.71260077, dtype=float32)]
Batch no:6
d_loss:1.4251903295516968
g_loss:[array(0.71276027, dtype=float32), array(0.71276027, dtype=float32), array(0.71276027, dtype=float32)]
Batch no:7
d_loss:1.4255361557006836
g_loss:[array(0.712922, dtype=float32), array(0.712922, dtype=float32), array(0.712922, dtype=f



g_loss:[array(0.71410465, dtype=float32), array(0.71410465, dtype=float32), array(0.71410465, dtype=float32)]




Batch no:1
d_loss:1.4282448291778564
g_loss:[array(0.714272, dtype=float32), array(0.714272, dtype=float32), array(0.714272, dtype=float32)]
Batch no:2
d_loss:1.4285762310028076
g_loss:[array(0.71443856, dtype=float32), array(0.71443856, dtype=float32), array(0.71443856, dtype=float32)]
Batch no:3
d_loss:1.428867220878601
g_loss:[array(0.7145821, dtype=float32), array(0.7145821, dtype=float32), array(0.7145821, dtype=float32)]
Batch no:4
d_loss:1.4292020797729492
g_loss:[array(0.7147655, dtype=float32), array(0.7147655, dtype=float32), array(0.7147655, dtype=float32)]
Batch no:5
d_loss:1.4295122623443604
g_loss:[array(0.71491355, dtype=float32), array(0.71491355, dtype=float32), array(0.71491355, dtype=float32)]
Batch no:6
d_loss:1.4298088550567627
g_loss:[array(0.715057, dtype=float32), array(0.715057, dtype=float32), array(0.715057, dtype=float32)]
Batch no:7
d_loss:1.430174469947815
g_loss:[array(0.7152568, dtype=float32), array(0.7152568, dtype=float32), array(0.7152568, dtype=floa



g_loss:[array(0.71893907, dtype=float32), array(0.71893907, dtype=float32), array(0.71893907, dtype=float32)]




Batch no:1
d_loss:1.437933087348938
g_loss:[array(0.7191174, dtype=float32), array(0.7191174, dtype=float32), array(0.7191174, dtype=float32)]
Batch no:2
d_loss:1.4382708072662354
g_loss:[array(0.719279, dtype=float32), array(0.719279, dtype=float32), array(0.719279, dtype=float32)]
Batch no:3
d_loss:1.4385838508605957
g_loss:[array(0.71944636, dtype=float32), array(0.71944636, dtype=float32), array(0.71944636, dtype=float32)]
Batch no:4
d_loss:1.4389288425445557
g_loss:[array(0.71961737, dtype=float32), array(0.71961737, dtype=float32), array(0.71961737, dtype=float32)]
Batch no:5
d_loss:1.439279317855835
g_loss:[array(0.7198148, dtype=float32), array(0.7198148, dtype=float32), array(0.7198148, dtype=float32)]
Batch no:6
d_loss:1.4396857023239136
g_loss:[array(0.7200197, dtype=float32), array(0.7200197, dtype=float32), array(0.7200197, dtype=float32)]
Batch no:7
d_loss:1.4401252269744873
g_loss:[array(0.7202358, dtype=float32), array(0.7202358, dtype=float32), array(0.7202358, dtype=f



g_loss:[array(0.7243702, dtype=float32), array(0.7243702, dtype=float32), array(0.7243702, dtype=float32)]




Batch no:1
d_loss:1.4488329887390137
g_loss:[array(0.7245815, dtype=float32), array(0.7245815, dtype=float32), array(0.7245815, dtype=float32)]
Batch no:2
d_loss:1.4492528438568115
g_loss:[array(0.72479206, dtype=float32), array(0.72479206, dtype=float32), array(0.72479206, dtype=float32)]
Batch no:3
d_loss:1.449637770652771
g_loss:[array(0.7249802, dtype=float32), array(0.7249802, dtype=float32), array(0.7249802, dtype=float32)]
Batch no:4
d_loss:1.4500024318695068
g_loss:[array(0.72515124, dtype=float32), array(0.72515124, dtype=float32), array(0.72515124, dtype=float32)]
Batch no:5
d_loss:1.4503569602966309
g_loss:[array(0.7253499, dtype=float32), array(0.7253499, dtype=float32), array(0.7253499, dtype=float32)]
Batch no:6
d_loss:1.4507887363433838
g_loss:[array(0.7255797, dtype=float32), array(0.7255797, dtype=float32), array(0.7255797, dtype=float32)]
Batch no:7
d_loss:1.4512475728988647
g_loss:[array(0.725793, dtype=float32), array(0.725793, dtype=float32), array(0.725793, dtype=

##Stage2 architecture

###Most of the architecture in stage2 can be borrowed from stage1 so we just need to code for the additional network that is required for stage2 architecture

In [1]:
from tensorflow.keras.layers import Add

In [2]:
def residual_block(input):

    x = Conv2D(128 * 4, kernel_size=(3, 3), padding='same', strides=1)(input)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(128 * 4, kernel_size=(3, 3), strides=1, padding='same')(x)
    x = BatchNormalization()(x)

    x = Add([x, input])
    x = ReLU()(x)

    return x

def joint_block(inputs):
    c = inputs[0]
    x = inputs[1]

    c = K.expand_dims(c, axis=1)
    c = K.expand_dims(c, axis=1)
    c = K.tile(c, [1, 16, 16, 1])
    return K.concatenate([c, x], axis=3)

In [3]:
from tensorflow.keras.layers import ZeroPadding2D

In [45]:
def stage2_generator():

    input_layer = Input(shape=(1024,))
    x = Dense(256)(input_layer)
    mean_logsigma = LeakyReLU(alpha=0.2)(x)
    c = Lambda(generate_c)(mean_logsigma)


    input_lr_images = Input(shape=(64, 64, 3))
    x = ZeroPadding2D(padding=(1, 1))(input_lr_images)
    x = Conv2D(128, kernel_size=(3, 3), strides=1, use_bias=False)(x)
    x = ReLU()(x)

    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(256, kernel_size=(4, 4), strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(512, kernel_size=(4, 4), strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # concatenate using joint block
    c_cat = Lambda(joint_block)([c, x])

    x = ZeroPadding2D(padding=(1, 1))(c_cat)
    x = Conv2D(512, kernel_size=(3, 3), strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # # 4. Residual blocks
    # x = residual_block(x)
    # x = residual_block(x)
    # x = residual_block(x)
    # x = residual_block(x)

    # 5. Upsampling blocks
    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = Activation('tanh')(x)

    model = Model(inputs=[input_layer, input_lr_images], outputs=[x, mean_logsigma])
    return model

In [46]:
model = stage2_generator()
model.summary()

In [36]:
def stage2_discriminator():

    input_layer = Input(shape=(256, 256, 3))

    x = Conv2D(64, (4, 4), padding='same', strides=2, input_shape=(256, 256, 3), use_bias=False)(input_layer)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(128, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(256, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(512, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(1024, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(2048, (4, 4), padding='same', strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(1024, (1, 1), padding='same', strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(512, (1, 1), padding='same', strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)



    input_layer2 = Input(shape=(4, 4, 128))

    merged_input = concatenate([x, input_layer2])

    x2 = Conv2D(1024, kernel_size=1, padding="same", strides=1)(merged_input)
    x2 = BatchNormalization()(x2)
    x2 = LeakyReLU(alpha=0.2)(x2)
    x2 = Flatten()(x2)
    x2 = Dense(1)(x2)
    x2 = Activation('sigmoid')(x2)

    model = Model(inputs=[input_layer, input_layer2], outputs=[x2])
    return model

In [49]:
model = stage2_discriminator()
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [55]:
def adversarial_model(gen_model2, dis_model, gen_model1):

    input_layer = Input(shape=(1024, ))
    noise_input_layer = Input(shape=(100, ))
    compressed_embedding_input_layer = Input(shape=(4, 4, 128))

    gen_model1.trainable = False
    dis_model.trainable = False

    lr_images, mean_logsigma1 = gen_model1([input_layer, noise_input_layer])
    hr_images, mean_logsigma2 = gen_model2([input_layer, lr_images])
    preds = dis_model([hr_images, compressed_embedding_input_layer])

    model = Model(inputs=[input_layer, noise_input_layer, compressed_embedding_input_layer], outputs=[preds, mean_logsigma2])
    return model

In [38]:
# Load high resolution images, labels and embeddings
X_hr_train, y_hr_train, embeddings_train = load_dataset(fname_path=filenames_path_train,
                                                            classids_path=class_ids_path_train,
                                                            embed_path=embed_path_train,
                                                            cub_dir=cub_dataset_dir,
                                                            image_size=(256, 256))

X_hr_test, y_hr_test, embeddings_test = load_dataset(fname_path=filenames_path_test,
                                                      classids_path=class_ids_path_test,
                                                      embed_path=embed_path_test,
                                                      cub_dir=cub_dataset_dir,
                                                      image_size=(256, 256))

  df_bboxes = pd.read_csv(bbox_path,delim_whitespace=True, header=None).astype(int)
  df_filenames = pd.read_csv(images_file_path, delim_whitespace=True, header=None)
  df_bboxes = pd.read_csv(bbox_path,delim_whitespace=True, header=None).astype(int)
  df_filenames = pd.read_csv(images_file_path, delim_whitespace=True, header=None)


In [None]:
# Train stage 2 architecture

stage2_dis = stage2_discriminator()
stage2_dis.compile(loss='binary_crossentropy', optimizer=d_optimizer)

stage1_gen = stage1_generator()
stage1_gen.compile(loss="binary_crossentropy", optimizer=g_optimizer)

stage1_gen.load_weights("/content/drive/MyDrive/StackGAN/weights/G1.weights.h5")

stage2_gen = stage2_generator()
stage2_gen.compile(loss="binary_crossentropy", optimizer=g_optimizer)

embedding_compressor_model = embedding_compressor_model()
embedding_compressor_model.compile(loss='binary_crossentropy', optimizer='adam')

adversarial_model = adversarial_model(stage2_gen, stage2_dis, stage1_gen)
adversarial_model.compile(loss=['binary_crossentropy', KL_loss], loss_weights=[1.0, 2.0],
                          optimizer=g_optimizer, metrics=None)



#  real and fake values
real_labels = np.ones((batch_size, 1), dtype=float)
fake_labels = np.zeros((batch_size, 1), dtype=float)

for epoch in range(epochs):

    gen_losses_2 = []
    dis_losses_2 = []

    # Load data and train model
    numbatches = int(X_hr_train.shape[0] / batch_size)

    for index in range(numbatches):
        print(f"Batch:{index+1}")

        # Create a noise vector
        z_noise = np.random.normal(0, 1, size=(batch_size, z_dim))
        X_hr_train_batch = X_hr_train[index * batch_size:(index + 1) * batch_size]
        embedding_batch = embeddings_train[index * batch_size:(index + 1) * batch_size]
        X_hr_train_batch = (X_hr_train_batch - 127.5) / 127.5

        # Generate fake images
        lr_fake_images, _ = stage1_gen.predict([embedding_batch, z_noise], verbose=3)
        hr_fake_images, _ = stage2_gen.predict([embedding_batch, lr_fake_images], verbose=3)


        compressed_embedding = embedding_compressor_model.predict_on_batch(embedding_batch)
        compressed_embedding = np.reshape(compressed_embedding, (-1, 1, 1, condition_dim))
        compressed_embedding = np.tile(compressed_embedding, (1, 4, 4, 1))

        # Losses
        dis_loss_real = stage2_dis.train_on_batch([X_hr_train_batch, compressed_embedding],
                                                  np.reshape(real_labels, (batch_size, 1)))
        dis_loss_fake = stage2_dis.train_on_batch([hr_fake_images, compressed_embedding],
                                                  np.reshape(fake_labels, (batch_size, 1)))

        d_loss = np.add(dis_loss_real,  dis_loss_fake)

        dis_losses_2.append(d_loss)
        print(f"d_loss:{d_loss}")


        g_loss = adversarial_model.train_on_batch([embedding_batch, z_noise, compressed_embedding],[K.ones((batch_size, 1)), K.ones((batch_size, 256))])
        print(f"g_loss:{g_loss}")
        gen_losses_2.append(g_loss)

    # Generate and save images after every 2nd epoch
    if epoch % 2 == 0:
        # z_noise2 = np.random.uniform(-1, 1, size=(batch_size, z_dim))
        z_noise2 = np.random.normal(0, 1, size=(batch_size, z_dim))
        embedding_batch = embeddings_test[0:batch_size]

        lr_fake_images, _ = stage1_gen.predict([embedding_batch, z_noise2], verbose=3)
        hr_fake_images, _ = stage2_gen.predict([embedding_batch, lr_fake_images], verbose=3)

        # Save images
        for i, img in enumerate(hr_fake_images[:10]):
            save_img(img, f"/content/drive/MyDrive/StackGAN/hr_results/gen_{epoch}_{i}.png")

# Saving the models
stage2_gen.save_weights("/content/drive/MyDrive/StackGAN/weights/stage2_gen.weights.h5")
stage2_dis.save_weights("/content/drive/MyDrive/StackGAN/weights/stage2_dis.weights.h5")

Batch:1




d_loss:1.3719773292541504
g_loss:[array(0.6928994, dtype=float32), array(0.6928994, dtype=float32), array(0.6928994, dtype=float32), array(0.6928994, dtype=float32)]
Batch:2
