In [3]:

# Necessary imports
from glob import glob
import pandas as pd
import urllib.request
import imageio
import os
import numpy as np
from utils import loadGloveModel, discriminator_loss, generator_loss, hms_string, test_image

from urllib.request import urlopen
import tensorflow as tf

from sklearn.metrics import mean_squared_error

import numpy as np
from PIL import Image
from tqdm import tqdm
import os 
import time
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
from models import build_discriminator_func, build_generator_func

In [117]:
glove_embeddings = loadGloveModel("../data/glove6b300dtxt/glove.6B.300d.txt")

Loading Glove Model
30-fold
Done. 399999  words loaded!


In [202]:
# Generation resolution - Must be square 
# Training data is also scaled to this.
GENERATE_RES = 2 # Generation resolution factor 
# (1=32, 2=64, 3=96, 4=128, etc.)
GENERATE_SQUARE = 32 * GENERATE_RES # rows/cols (should be square)
IMAGE_CHANNELS = 3

# Preview image 
PREVIEW_ROWS = 4
PREVIEW_COLS = 7
PREVIEW_MARGIN = 16

# Size vector to generate images from
SEED_SIZE = 100
EMBEDDING_SIZE = 300

# Configuration
DATA_PATH = "../data/flowers"
MODEL_PATH = "saved_models/flowers"
EPOCHS = 500
BATCH_SIZE = 64
BUFFER_SIZE = 4000

print(f"Will generate {GENERATE_SQUARE}px square images.")

Will generate 64px square images.


In [126]:
training_binary_path = os.path.join("DATA_PATH", "image_npy",
        f'training_data_{GENERATE_SQUARE}_{GENERATE_SQUARE}_')

start = time.time()
print("Loading training images...")

training_data = []
flower_paths = sorted(glob(DATA_PATH + "/*.jpg"))

for filename, file_path in tqdm(enumerate(flower_paths)):
    try:
        image = Image.open(file_path).resize((GENERATE_SQUARE,
            GENERATE_SQUARE),Image.LANCZOS)
        channel = np.asarray(image).shape[2]
        if channel == 3:
            training_data.append(np.asarray(image))
    except KeyboardInterrupt:
        print("Keyboard Interrup by me...")
        break
    except:
        pass
    if len(training_data) == 100:
        training_data = np.reshape(training_data,(-1,GENERATE_SQUARE,
                GENERATE_SQUARE,IMAGE_CHANNELS))
        training_data = training_data.astype(np.float32)
        training_data = training_data / 127.5 - 1.
        np.save(training_binary_path + str(100000 + filename) + ".npy",training_data)
        elapsed = time.time()-start
        training_data = []
print("Complete")
     

Loading training images...


8189it [01:00, 136.23it/s]

Complete





In [127]:

caption_df = pd.read_csv(os.path.join(DATA_PATH, "metadata.csv"))

In [128]:
captions = []
caption_embeddings = np.zeros((len(caption_df),300),dtype=np.float32)
for i, row in tqdm(caption_df.iterrows()):
    filename = row['file_name'].rstrip('jpg')
    x = row['caption']
    x = x.replace(" ","")
    captions.append(x)
    count = 0
    for t in x:
        try:
            caption_embeddings[i] += glove_embeddings[t]
            count += 1
        except:
            pass
    caption_embeddings[i] /= count

8189it [00:02, 3055.38it/s]


In [130]:
embedding_binary_path = os.path.join(DATA_PATH,
        f'embedding_data.npy')
np.save(embedding_binary_path,caption_embeddings)

In [131]:
embedding_binary_path = os.path.join(DATA_PATH,
        f'embedding_data.npy')
caption_embeddings = np.load(embedding_binary_path)
caption_embeddings.shape

(8189, 300)

In [132]:
image_binary_path = os.path.join(DATA_PATH, 'image_npy')
images = os.listdir(image_binary_path)

In [133]:
final_images = np.load(image_binary_path + images[0])
for i in images[1:]:
    try:
        final_images = np.concatenate([final_images,np.load(image_binary_path + i)],axis = 0)
    except:
        pass

In [173]:
save_images_captions = captions[-28:].copy()
save_images_embeddings = np.copy(caption_embeddings[-28:])
save_images_npy = np.copy(final_images[-28:])
save_images = final_images[-28:]

In [174]:
train_images_captions = captions[:-28].copy()
train_images_embeddings = np.copy(caption_embeddings[:-28])
train_images_npy = np.copy(final_images[:-28])
train_images = final_images[:-28]

In [175]:
p = np.random.permutation(len(train_images))

In [176]:
final_images_shuffled = train_images[p]
final_embeddings_shuffled = train_images_embeddings[p]

In [177]:
final_images_shuffled.shape

(8072, 64, 64, 3)

In [178]:
final_embeddings_shuffled.shape

(8072, 300)

In [225]:
train_dataset = tf.data.Dataset.from_tensor_slices({'images': final_images_shuffled,
                                                    'embeddings': final_embeddings_shuffled}).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [245]:
cross_entropy = tf.keras.losses.BinaryCrossentropy()

In [246]:
generator = build_generator_func(SEED_SIZE,EMBEDDING_SIZE, IMAGE_CHANNELS)

image_shape = (GENERATE_SQUARE,GENERATE_SQUARE,IMAGE_CHANNELS)
discriminator = build_discriminator_func(image_shape,EMBEDDING_SIZE)

In [247]:
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=2.0e-4,beta_1 = 0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=2.0e-4,beta_1 = 0.5)

In [248]:
tf.random.normal([BATCH_SIZE, SEED_SIZE],dtype=tf.float32).shape

TensorShape([64, 100])

In [249]:

@tf.function
def train_step(images,captions,fake_captions):
  seed = tf.random.normal([images.shape[0], SEED_SIZE],dtype=tf.float32)

  with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
    generated_images = generator((seed,captions), training=True)
    real_image_real_text = discriminator((images,captions), training=True)
    real_image_fake_text = discriminator((images,fake_captions), training=True)
    fake_image_real_text = discriminator((generated_images,captions), training=True)

    gen_loss = generator_loss(fake_image_real_text)
    disc_loss = discriminator_loss(real_image_real_text, fake_image_real_text, real_image_fake_text)


    gradients_of_generator = gen_tape.gradient(\
        gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(\
        disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(
        gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(
        gradients_of_discriminator, 
        discriminator.trainable_variables))
  return gen_loss,disc_loss

In [None]:
def train(train_dataset, epochs):
  epochs = 500

  fixed_seed = np.random.normal(0, 1, (PREVIEW_ROWS * PREVIEW_COLS, 
                                    SEED_SIZE))
  fixed_embed = save_images_embeddings

  start = time.time()

  for epoch in range(epochs):
      print("epoch start...")
      epoch_start = time.time()

      gen_loss_list = []
      disc_loss_list = []

      for batch in train_dataset:
        train_batch = batch['images']
        caption_batch = batch['embeddings']

        fake_caption_batch = np.copy(caption_batch)
        np.random.shuffle(fake_caption_batch)

        t = train_step(train_batch,caption_batch,fake_caption_batch)
        gen_loss_list.append(t[0])
        disc_loss_list.append(t[1])
        
      print("now")
      g_loss = sum(gen_loss_list) / len(gen_loss_list)
      d_loss = sum(disc_loss_list) / len(disc_loss_list)

      epoch_elapsed = time.time()-epoch_start
      print(f'Epoch {epoch+1}, gen loss={g_loss},disc loss={d_loss}, {hms_string(epoch_elapsed)}')
      save_images(epoch,fixed_seed,fixed_embed)

      generator.save(os.path.join(MODEL_PATH,"text_to_image_generator_cub_character.h5"))
      discriminator.save(os.path.join(MODEL_PATH,"text_to_image_disc_cub_character.h5"))
      print("model saved")

      elapsed = time.time()-start
      print ('Training time:', hms_string(elapsed))

epoch start...


2024-04-30 19:31:46.014592: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape infunctional_25_1/dropout_20_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


now
Epoch 1, gen loss=0.7572728991508484,disc loss=1.5628712177276611, 0:00:29.94
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 629ms/step
model saved
Training time: 0:00:30.78
epoch start...


In [243]:
test_image("this flower is purple in color with oval shaped petals",1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
