In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

# Load and preprocess the MNIST dataset
(x_train, _), (_, _) = mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_train = tf.expand_dims(x_train, axis=-1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [2]:
# Define the generator model using a transformer architecture
def generator_model():
    inputs = layers.Input(shape=(28, 28, 1))
    x = layers.Rescaling(scale=2.0)(inputs)  # Scale the pixel values to [-1, 1]
    x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
    x = layers.Conv2D(64, 3, strides=2, padding="same")(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256)(x)
    x = layers.Reshape((1, 1, 256))(x)
    x = layers.Conv2DTranspose(128, 7, strides=1, padding="valid")(x)
    x = layers.Conv2DTranspose(64, 5, strides=2, padding="same")(x)
    x = layers.Conv2DTranspose(1, 3, strides=2, padding="same")(x)
    outputs = layers.Activation("sigmoid")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.summary()
    return model

# Instantiate the generator model
generator = generator_model()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 rescaling (Rescaling)       (None, 28, 28, 1)         0         
                                                                 
 conv2d (Conv2D)             (None, 14, 14, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 64)          18496     
                                                                 
 global_average_pooling2d (G  (None, 64)               0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 256)               16640     
                                                             

In [3]:
# Compile the model
generator.compile(loss="binary_crossentropy", optimizer="adam")

# Train the generator on the MNIST dataset
generator.fit(x_train, x_train, batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1fd9205f520>

In [5]:
# Generate new images
random_input = tf.random.normal(shape=(10, 28, 28, 1))
generated_images = generator.predict(random_input)



In [6]:
# Display the generated images
for i in range(10):
    generated_image = generated_images[i]
    tf.keras.preprocessing.image.array_to_img(generated_image).show()

### Vision Trannsformer

In [11]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10

# Load and preprocess the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Define the Vision Transformer model
def vision_transformer_model(image_size, patch_size, num_classes):
    inputs = layers.Input(shape=image_size + (3,))
    x = layers.Conv2D(filters=64, kernel_size=3, strides=1, padding="same")(inputs)
    x = layers.BatchNormalization()(x)

    # Reshape the input into patches
    num_patches = (image_size[0] // patch_size) * (image_size[1] // patch_size)
    patch_dim = 64  # Number of filters in the Conv2D layer
    x = layers.Reshape((num_patches, patch_dim))(x)

    # Tokenization and Position Embedding
    positions = tf.range(start=0, limit=num_patches, delta=1)
    position_embeddings = layers.Embedding(input_dim=num_patches, output_dim=patch_dim)(positions)
    x = layers.Add()([x, position_embeddings])

    # Transformer Encoder layers
    num_transformer_layers = 6
    num_heads = 8
    transformer_units = [
        patch_dim * 2,
        patch_dim
    ]
    for _ in range(num_transformer_layers):
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=patch_dim, dropout=0.1
        )(x, x)
        x = layers.Add()([x, attention_output])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = layers.Dense(units=transformer_units[0], activation="gelu")(x)
        x = layers.Dense(units=transformer_units[1])(x)
        x = layers.Add()([x, attention_output])

    # Classification head
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    # Create the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# Set hyperparameters
image_size = (32, 32)
patch_size = 4
num_classes = 10

# Instantiate the Vision Transformer model
vit_model = vision_transformer_model(image_size, patch_size, num_classes)

# Compile the model
vit_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

# Train the Vision Transformer model
vit_model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_test, y_test))


ValueError: Exception encountered when calling layer "reshape_5" (type Reshape).

total size of new array must be unchanged, input_shape = [32, 32, 64], output_shape = [64, 64]

Call arguments received by layer "reshape_5" (type Reshape):
  • inputs=tf.Tensor(shape=(None, 32, 32, 64), dtype=float32)

### DCGAN

In [25]:
import os
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
import numpy as np
import matplotlib.pyplot as plt


# Load and preprocess the CIFAR-10 dataset
(x_train, _), (_, _) = cifar10.load_data()
x_train = (x_train.astype("float32") - 127.5) / 127.5  # Normalize to [-1, 1]
batch_size = 128

In [28]:
# Create a TensorFlow Dataset
dataset = tf.data.Dataset.from_tensor_slices(x_train).shuffle(len(x_train)).batch(batch_size)

# Define the Generator network
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(4 * 4 * 256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((4, 4, 256)))
    assert model.output_shape == (None, 4, 4, 256)

    model.add(layers.Conv2DTranspose(128, (3, 3), strides=(1, 1), padding="same", use_bias=False))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding="same", use_bias=False))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (3, 3), strides=(1, 1), padding="same", use_bias=False))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    model.add(layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding="same", use_bias=False))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(3, (3, 3), strides=(2, 2), padding="same", use_bias=False, activation="tanh"))
    assert model.output_shape == (None, 32, 32, 3)
    model.summary()
    return model

# Define the Discriminator network
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same", input_shape=[32, 32, 3]))
    model.add(layers.LeakyReLU())
#     model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"))
    model.add(layers.LeakyReLU())
#     model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(256, (3, 3), strides=(2, 2), padding="same"))
    model.add(layers.LeakyReLU())

    model.add(layers.Flatten())
    model.add(layers.Dense(1))
    model.summary()
    return model

# Create the Generator and Discriminator models
generator = make_generator_model()
discriminator = make_discriminator_model()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 4096)              409600    
                                                                 
 batch_normalization_18 (Bat  (None, 4096)             16384     
 chNormalization)                                                
                                                                 
 leaky_re_lu_22 (LeakyReLU)  (None, 4096)              0         
                                                                 
 reshape_11 (Reshape)        (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose_16 (Conv2D  (None, 4, 4, 128)        294912    
 Transpose)                                                      
                                                                 
 batch_normalization_19 (Bat  (None, 4, 4, 128)       

In [29]:
# Define the loss functions
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

# Define the generator loss
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

# Define the discriminator loss
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

# Define the optimizer for both networks
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

# Define the training loop
@tf.function
def train_step(images):
    noise = tf.random.normal([batch_size, 100])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

# Define the training function
def train(dataset, epochs):
    for epoch in range(epochs):
        for image_batch in dataset:
            train_step(image_batch)

        # Generate images after each epoch to visualize progress
        generate_and_save_images(generator, epoch + 1)

# Generate and save images
def generate_and_save_images(model, epoch):
    noise = tf.random.normal([16, 100])
    generated_images = model(noise, training=False)
    generated_images = 0.5 * generated_images + 0.5  # Rescale images to [0, 1]

    # Create the directory if it doesn't exist
    os.makedirs("generated_images", exist_ok=True)

    fig, axs = plt.subplots(4, 4, figsize=(8, 8))
    count = 0
    for i in range(4):
        for j in range(4):
            axs[i, j].imshow(generated_images[count, :, :, :])
            axs[i, j].axis("off")
            count += 1
    if epoch % 100 == 0:
        plt.savefig("generated_images/image_at_epoch_{:04d}.png".format(epoch))
        plt.close()

In [None]:
# Train the DCGAN
epochs = 1000
train(dataset, epochs)

  fig, axs = plt.subplots(4, 4, figsize=(8, 8))
