In [61]:
import keras
import tensorflow as tf
import numpy as np
from keras import layers
from keras import ops
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [62]:
X = np.load('x_letters.npy')
Y = np.load('y_letters.npy')

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train = X_train.reshape([-1, 28, 28, 1])
X_test = X_test.reshape([-1, 28, 28, 1])

In [63]:
def build_discriminator(img_dim, num_classes):
    # image input 28x28x1 for grayscale images
    image_input= layers.Input(shape=(img_dim, img_dim,1))
    n_nodes = img_dim * img_dim
    # embedding the label input
    in_label = layers.Input(shape=(1,))
    label_input = layers.Embedding(num_classes, 50)(in_label)
    label_input = layers.Dense(n_nodes)(label_input)
    label_input = layers.Reshape((img_dim, img_dim, 1))(label_input)

    concat= layers.Concatenate()([image_input, label_input])
    fe = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(concat)
    fe = layers.LeakyReLU(alpha=0.2)(fe)
    fe = layers.Conv2D(128, (3,3), strides=(2,2), padding='same')(fe)
    fe = layers.LeakyReLU(alpha=0.2)(fe)
    fe = layers.Flatten()(fe)
    fe = layers.Dropout(0.4)(fe)

    real_or_fake= layers.Dense(1, activation='sigmoid')(fe)
    return tf.keras.Model([image_input, in_label], real_or_fake)

In [64]:

def build_generator(img_dim, num_classes, latent_dim):
    # label input
    in_label = layers.Input(shape=(1,))
    # embedding for categorical input
    li = layers.Embedding(num_classes, 12)(in_label)

    # Flatten the embedding output
    li = layers.Flatten()(li)
    # linear multiplication
    n_nodes = 7 * 7
    li = layers.Dense(n_nodes)(li)
 
    # reshape to additional channel
    li = layers.Reshape((7, 7, 1))(li)
    
    # Fully connected layers
    in_latent = layers.Input(shape=(latent_dim,))

    latent_input = layers.Dense(128*7*7, activation="relu", kernel_initializer="he_normal")(in_latent)
    latent_input = layers.LeakyReLU(negative_slope=0.2)(latent_input)  # LeakyReLU is often used in GANs
    latent_input = layers.Reshape((7, 7, 128))(latent_input)  # Reshape for Conv2DTranspose

    # Concatenate latent and label inputs
    concat = layers.Concatenate()([latent_input, li])

   # Transposed convolutional layers to upscale the image
    x = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(concat)  # 7x7 -> 14x14
    x = layers.LeakyReLU(negative_slope=0.2)(x)
    
    x = layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(x)  # 14x14 -> 28x28
    x = layers.LeakyReLU(negative_slope=0.2)(x)
    
    # Final layer to produce the output image
    generated_image = layers.Conv2DTranspose(1, kernel_size=4, strides=1, padding='same', activation='tanh')(x)  # 28x28x1
    
    return tf.keras.Model([in_latent, in_label], generated_image)



In [66]:
class GAN(keras.Model):
    def __init__(self, discriminator, generator, latent_dim, num_classes):
        super().__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim
        self.num_classes = num_classes
        self.seed_generator = keras.random.SeedGenerator(1337)

    def compile(self, d_optimizer, g_optimizer, loss_fn):
        super().compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn
        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
        self.g_loss_metric = keras.metrics.Mean(name="g_loss")

    @property
    def metrics(self):
        return [self.d_loss_metric, self.g_loss_metric]

    def train_step(self, data):
        real_images, real_labels = data  # Unpack the data (images and their labels)

        # Sample random points in the latent space
        batch_size = ops.shape(real_images)[0]
        random_latent_vectors = keras.random.normal(
            shape=(batch_size, self.latent_dim),
            seed = self.seed_generator
        )

        # 1. Train the Discriminator:
        # --------------------------------


        # Generate random labels for the generated images
        random_labels = tf.random.uniform(
            shape=(batch_size,), minval=0, maxval=self.num_classes, dtype=tf.float32
        )

        # Generate fake images conditioned on these random labels
        generated_images = self.generator([random_latent_vectors, random_labels])

        # Combine real and fake images and their labels
        combined_images = tf.concat([generated_images, real_images], axis=0)
        combined_labels = tf.concat([random_labels, real_labels], axis=0)

        real_fake_labels = tf.concat(
            [tf.ones((batch_size, 1)) * 0.9,  # Real labels smoothed (0.9 instead of 1)
            tf.zeros((batch_size, 1)) + 0.1],  # Fake labels smoothed (0.1 instead of 0)
            axis=0
        )
        
        # Train the discriminator
        with tf.GradientTape() as tape:
            predictions = self.discriminator([combined_images, combined_labels])
            d_loss = self.loss_fn(real_fake_labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))

        # 2. Train the Generator:
        # --------------------------------
        # Sample random points in the latent space again
        random_latent_vectors = keras.random.normal(
            shape=(batch_size, self.latent_dim), seed=self.seed_generator
        )

        # Generate random labels again
        random_labels = tf.random.uniform(
            shape=(batch_size,), minval=0, maxval=self.num_classes, dtype=tf.int32
        )

        # Assemble labels that say "all real images" (i.e., we want the generator to fool the discriminator)
        misleading_labels = tf.ones((batch_size, 1))

        # Train the generator
        with tf.GradientTape() as tape:
            fake_images = self.generator([random_latent_vectors, random_labels])
            predictions = self.discriminator([fake_images, random_labels])
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))

        # Update metrics
        self.d_loss_metric.update_state(d_loss)
        self.g_loss_metric.update_state(g_loss)
        return {
            "d_loss": self.d_loss_metric.result(),
            "g_loss": self.g_loss_metric.result(),
        }


In [67]:
def generate_and_compare_image(generator, latent_dim, label):
    # Step 1: Display the actual image from the dataset
    plt.figure(figsize=(3, 1))
    plt.subplot(1, 2, 1)
    plt.imshow(X[np.where(Y == label)[0][0]], cmap='gray', interpolation='nearest')
    plt.title("Real")
    plt.axis('off')  # Hide the axes
    
    # Step 2: Generate a random latent vector (noise)
    random_latent_vector = np.random.normal(size=(1, latent_dim))
    
    # Step 3: Pass the numeric label directly (no need for one-hot encoding)
    random_label = np.array([label], dtype=np.float32)  # Assuming labels are float32 like in the train_step
    
    # Step 4: Generate an image using the generator model
    generated_image = generator([random_latent_vector, random_label])
    
    # Step 5: Post-process the image (Rescale from [-1, 1] to [0, 1] for visualization)
    generated_image = (generated_image[0, :, :, 0] + 1) / 2.0  # Assuming it's grayscale
    
    # Step 6: Display the generated image
    plt.subplot(1, 2, 2)
    plt.imshow(generated_image, cmap='gray')
    plt.title("Generated")
    plt.axis('off')  # Turn off axis labels
    
    # Step 7: Show the comparison of actual vs generated images side by side
    plt.show()


In [68]:
img_dim = 28
num_classes = 26
latent_dim = 72

dis = build_discriminator(img_dim=img_dim, num_classes= num_classes)
gen = build_generator(img_dim=img_dim, num_classes=num_classes, latent_dim=latent_dim)



In [74]:
# Instantiate GAN model

gan = GAN(discriminator=dis, generator=gen, latent_dim=latent_dim, num_classes=num_classes)

# Compile the GAN model
gan.compile(
    d_optimizer=keras.optimizers.RMSprop(learning_rate=0.001,),
    g_optimizer=keras.optimizers.RMSprop(learning_rate=0.01),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

# Train the model
epochs = 5
# gen.summary(), dis.summary()
gan.fit(X_train, Y_train.astype('float64'), epochs=epochs)

generate_and_compare_image(gan.generator, latent_dim=latent_dim, label=2, num_classes=num_classes)

Epoch 1/5
[1m2220/2220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 103ms/step - d_loss: 0.3298 - g_loss: 0.1091
Epoch 2/5
[1m 730/2220[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m3:31[0m 142ms/step - d_loss: 0.3253 - g_loss: 0.1056

KeyboardInterrupt: 