In [1]:
import numpy as np

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Concatenate, Dense, Conv2D, LeakyReLU, Conv2DTranspose, Flatten, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
import tensorflow_datasets as tfds

In [2]:
def build_generator(img_size, latent_dim):
    z = Input(latent_dim)
    x = Conv2DTranspose(512, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(z)
    x = Conv2D(512, (3, 3), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2DTranspose(256, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2D(256, (3, 3), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2D(128, (3, 3), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2D(128, (3, 3), padding="same", activation = tf.nn.leaky_relu)(x)
    x = Conv2DTranspose(img_size[-1], (3,3),strides=(2,2),padding="same", activation = tf.nn.leaky_relu)(x)
    return Model(z, x)

def build_encoder(img_size, latent_dim):
    x = Input(img_size)
    y = Conv2D(128, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(x)
    y = Conv2D(128, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(256, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(256, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(latent_dim[-1], (3,3), strides=(2,2), padding="same")(y)
    return Model(x,y)

def build_discriminator(img_size, latent_dim):
    x = Input(img_size)
    z = Input(latent_dim)
    _z = Flatten()(z)
    _z = Dense(img_size[0]*img_size[1]*img_size[2])(_z)
    _z = Reshape(img_size)(_z)

    y = Concatenate()([x,_z])
    y = Conv2D(128, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(128, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(256, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(256, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(512, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(1024, (3, 3), strides=(2, 2), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Conv2D(1024, (3, 3), padding="same", activation = tf.nn.leaky_relu)(y)
    y = Flatten()(y)
    y = Dense(1)(y)
    return Model([x, z], [y])



In [3]:
def load_dataset(data_dir, img_size = 64, batch_size = 1):
    df = tf.keras.utils.image_dataset_from_directory(directory = data_dir, 
                                                     labels = None, 
                                                     batch_size= None,
                                                     image_size = (img_size, img_size),
                                                     seed = 42)
    
    return df

In [4]:
data_folder = "sample_data/"
img_dim = 64
train_ds = load_dataset(data_folder+"img", img_size = img_dim, batch_size = 1)

Found 2560 files belonging to 1 classes.


2023-03-24 03:14:54.518088: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 03:14:54.535670: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 03:14:54.538655: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-24 03:14:54.543507: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [5]:
def apply_norm(x):
    return (x*2)/255.0 - 1

In [6]:
latent_dim = (2, 2, 128)
img_size = (img_dim, img_dim, 3)

enc = build_encoder(img_size, latent_dim)
gen = build_generator(img_size, latent_dim)
disc = build_discriminator(img_size, latent_dim)

g_opt = Adam(learning_rate = 1e-4)
e_opt = Adam(learning_rate = 1e-4)
d_opt = Adam(learning_rate = 1e-4)


In [7]:
num = len(train_ds)

train_ds = train_ds.map(apply_norm)
train_ds = tfds.as_numpy(train_ds)

z_train = np.random.uniform(-1.0, 1.0, (num, )+latent_dim).astype("float32")

In [8]:
import random

t = list(zip(train_ds, z_train))
random.shuffle(t)
train_ds, z_train = zip(*t)

train_ds = np.array(train_ds)
z_train = np.array(z_train)

In [9]:
epochs = 100
batch_size = 16

In [10]:
X_train = train_ds.reshape(-1, batch_size, img_dim, img_dim, 3)
z_train = z_train.reshape(-1, batch_size, latent_dim[0], latent_dim[1], latent_dim[2])

In [40]:
for epoch in tqdm(range(epochs), desc='Total'):
    
    running_loss = [0,0,0]
    num_steps = num//batch_size
    
    perm = np.random.permutation(X_train.shape[0])
    
    X_train = X_train[perm]
    z_train = np.random.uniform(-1.0, 1.0, (num, )+latent_dim).astype("float32")
    z_train = z_train.reshape(-1, batch_size, latent_dim[0], latent_dim[1], latent_dim[2])
    
    for steps in range(0, num_steps):
        x_batch = X_train[steps]
        z_batch = z_train[steps]
        
        with tf.GradientTape(persistent=True, watch_accessed_variables=True) as tape:      
            fake_img = gen(z_batch)
            fake_z = enc(x_batch)

            preds = disc([tf.concat([fake_img, x_batch], axis = 0) , tf.concat([z_batch, fake_z], axis = 0)])
            pred_gen, pred_enc = tf.split(preds, 2, axis = 0)
            
            gen_loss = tf.reduce_mean(tf.nn.softplus(-pred_gen))
            enc_loss = tf.reduce_mean(tf.nn.softplus(pred_enc))
            
            disc_loss = tf.reduce_mean(tf.nn.softplus(pred_gen)) + tf.reduce_mean(tf.nn.softplus(-pred_enc))

        gen_grad = tape.gradient(gen_loss, gen.trainable_variables)        
        disc_grad = tape.gradient(disc_loss, disc.trainable_variables)
        enc_grad = tape.gradient(enc_loss, enc.trainable_variables)

        g_opt.apply_gradients(zip(gen_grad,gen.trainable_variables))
        e_opt.apply_gradients(zip(enc_grad,enc.trainable_variables))
        d_opt.apply_gradients(zip(disc_grad,disc.trainable_variables))

        running_loss[0] += disc_loss.numpy()
        running_loss[1] += gen_loss.numpy()
        running_loss[2] += enc_loss.numpy()
        del tape
    print('[{}] Desc loss : {:.10f}\t Gen Loss : {:.10f}\t Enc Loss : {:.10f}'.format(epoch,running_loss[0],running_loss[1], running_loss[2] ))

Total:   0%|          | 0/100 [00:00<?, ?it/s]

[0] Desc loss : 13.1535528820	 Gen Loss : 1373.4470348358	 Enc Loss : 2590.7647881508
[1] Desc loss : 14.4569167111	 Gen Loss : 1659.8978767395	 Enc Loss : 3480.3543143272
[2] Desc loss : 14.4211983825	 Gen Loss : 1314.9784069061	 Enc Loss : 2616.6749918461
[3] Desc loss : 13.5996029071	 Gen Loss : 1399.2598161697	 Enc Loss : 3255.1986703873
[4] Desc loss : 13.1622095953	 Gen Loss : 1690.7522020340	 Enc Loss : 2733.8005280495
[5] Desc loss : 13.8165076920	 Gen Loss : 1625.9779486656	 Enc Loss : 2706.8531537056
[6] Desc loss : 11.1726483789	 Gen Loss : 1636.6813797951	 Enc Loss : 2678.0524311066
[7] Desc loss : 17.4227727035	 Gen Loss : 1696.0082964897	 Enc Loss : 4110.1270351410
[8] Desc loss : 15.4284977469	 Gen Loss : 1781.7902662754	 Enc Loss : 2869.8817496300
[9] Desc loss : 16.2349460035	 Gen Loss : 1335.8426141739	 Enc Loss : 3584.7620216608
[10] Desc loss : 15.2249684765	 Gen Loss : 1341.9857816696	 Enc Loss : 2998.4582509995
[11] Desc loss : 16.4540030561	 Gen Loss : 1405.32788

KeyboardInterrupt: 

In [None]:
for _ in range(2):
    num_sample = 10
    z_test = np.random.uniform(-1.0, 1.0, (num, )+latent_dim).astype("float32")
    #pred = gen.predict(z_test[:num_sample]).reshape(-1, img_dim, img_dim, 3)
    pred = gen.predict(enc(X_train[0])).reshape(-1, img_dim, img_dim, 3)

    _, axs = plt.subplots(2,num_sample, figsize=(24,12))
    for i in range(num_sample):
        img = np.clip((pred[i]+1)*(255.0/2), 0, 255).astype("uint32")
        #img = np.clip((pred[i]+1), 0, 2)
        axs[0][i].imshow(img)
        axs[0][i].tick_params(length=0, labelsize=0)
        axs[1][i].tick_params(length=0, labelsize=0)
        axs[1][i].imshow(X_train[0][i])









In [None]:
gen.save('model/generator')
enc.save('model/enc')
disc.save('model/desc')