In [7]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt

# Step 1: Define the Deformable Convolution Layer
# This layer applies a deformable convolution operation with learnable offsets to input images
import tensorflow as tf
from tensorflow.keras import layers

class DeformableConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='same', **kwargs):
        super(DeformableConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size[0]
        self.strides = strides
        self.padding = padding

        # Convolution layer to generate offset values (how the kernel moves)
        # Set dilation_rate to 1 when strides > 1
        dilation_rate = 1 if strides > 1 else strides
        self.offset_conv = layers.Conv2D(2 * self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides, dilation_rate=dilation_rate)

        # Convolution layer to generate modulation values (how much we scale the pixels)
        # Set dilation_rate to 1 when strides > 1
        self.modulation_conv = layers.Conv2D(self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides, dilation_rate=dilation_rate)

        # Final convolutional layer to generate the output features
        self.main_conv = layers.Conv2D(filters, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

    # ... (rest of the code remains the same) ...
    # ... (rest of the code remains the same) ...
    def build(self, input_shape):
        # This method is used to initialize the layer's state
        self.input_shape = input_shape  # Save the input shape for future use

    def call(self, inputs):
        # Generate offsets for the convolution grid
        offset = self.offset_conv(inputs)

        # Generate modulation weights
        modulation = tf.sigmoid(self.modulation_conv(inputs))  # Sigmoid to get values between 0 and 1

        # Create a grid of indices based on the image dimensions
        grid = tf.meshgrid(
            tf.range(tf.shape(inputs)[1]), tf.range(tf.shape(inputs)[2]), indexing='ij'
        )
        grid = tf.stack(grid, axis=-1)  # Shape becomes [height, width, 2]
        grid = tf.cast(grid, tf.float32)  # Cast grid indices to float32

        # Expand grid and offset to match the batch size and input dimensions
        batch_size = tf.shape(inputs)[0]
        grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1, 1])  # Shape becomes [batch_size, height, width, 2]

        # Apply offsets to the grid, ensuring the dimensions match
        offset_grid = grid + offset  # Shape becomes [batch_size, height, width, 2]

        # Sample the input image based on the adjusted grid (deformable sampling)
        sampled_inputs = self._deformable_sample(inputs, offset_grid)

        # Scale the sampled inputs based on the modulation values
        sampled_inputs = sampled_inputs * modulation

        # Apply the final convolution layer
        return self.main_conv(sampled_inputs)

    def _deformable_sample(self, inputs, offset_grid):
        """Function to sample the inputs based on offset grid (not implemented here)."""
        # This is a placeholder function for the deformable sampling process.
        # In practice, you would need to implement or use a function to sample the input image
        # based on the offset_grid. This is typically done using bilinear interpolation.
        # For now, we'll assume that you have a working method for sampling.
        return inputs  # Placeholder (in practice, you will sample using the offset_grid)





# Step 2: Build the Generator Model
# The generator creates fake images from random noise input
def build_generator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layer to downsample the input
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Apply batch normalization for stable training

    # Deformable Convolution layer to further downsample with deformable convolution
    x = DeformableConv2D(128, (3, 3), strides=2)(x)

    # Upsampling via transposed convolution (deconvolution) to generate larger feature maps
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Final convolution layer to generate the output image
    x = layers.Conv2D(3, (3, 3), padding='same', activation='tanh')(x)  # Output image with 3 channels (RGB)

    return Model(inputs, x, name="Generator")


# Step 3: Build the Discriminator Model
# The discriminator distinguishes between real and fake images
def build_discriminator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layers to extract features from the image
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Batch normalization to stabilize training

    x = layers.Conv2D(128, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Flatten the output and apply a fully connected layer for classification
    x = layers.Flatten()(x)
    x = layers.Dense(1, activation='sigmoid')(x)  # Sigmoid activation to classify between real (1) and fake (0)

    return Model(inputs, x, name="Discriminator")


# Step 4: Compile the Models
generator = build_generator()  # Create the generator
discriminator = build_discriminator()  # Create the discriminator

# Compile the discriminator with Adam optimizer and binary crossentropy loss
discriminator.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')
discriminator.trainable = False  # Freeze discriminator during generator training

# Create the GAN model by combining the generator and discriminator
gan_input = layers.Input(shape=(128, 128, 3))  # Input is a random noise vector of size 128x128x3
generated_image = generator(gan_input)  # Pass input noise through the generator
gan_output = discriminator(generated_image)  # Classify the generated image using the discriminator
gan = Model(gan_input, gan_output)

# Compile the GAN model
gan.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')


# Step 5: CIFAR-10 Dataset Preparation
# Load CIFAR-10 dataset, preprocess and normalize the images
def preprocess_images(image):
    # Resize the image to 128x128
    image = tf.image.resize(image, [128, 128])
    return (image - 127.5) / 127.5  # Normalize the pixel values to [-1, 1]

# Load the CIFAR-10 dataset from TensorFlow's built-in datasets
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()

# Preprocess and normalize the images
x_train = preprocess_images(x_train)

# Convert the images into a TensorFlow dataset and batch them
dataset = tf.data.Dataset.from_tensor_slices(x_train)
dataset = dataset.shuffle(1000).batch(32)  # Shuffle and batch the dataset


# Step 6: Training Loop
# Function to train the GAN
def train(generator, discriminator, gan, dataset, epochs=50):
    for epoch in range(epochs):
        for real_images in dataset:
            batch_size = real_images.shape[0]  # Get the batch size

            # Generate fake images using the generator
            noise = tf.random.normal((batch_size, 128, 128, 3))  # Random noise for generator input
            fake_images = generator(noise)  # Generate fake images from the noise

            # Create labels for real and fake images (1 for real, 0 for fake)
            real_labels = tf.ones((batch_size, 1))  # Label for real images
            fake_labels = tf.zeros((batch_size, 1))  # Label for fake images

            # Train the discriminator with real images
            discriminator_loss_real = discriminator.train_on_batch(real_images, real_labels)

            # Train the discriminator with fake images
            discriminator_loss_fake = discriminator.train_on_batch(fake_images, fake_labels)

            # Average discriminator loss
            discriminator_loss = 0.5 * (discriminator_loss_real + discriminator_loss_fake)

            # Train the generator via the GAN (freeze discriminator)
            generator_loss = gan.train_on_batch(noise, real_labels)

        # Print the loss values at each epoch
        print(f"Epoch {epoch + 1}/{epochs}, Discriminator Loss: {discriminator_loss:.4f}, Generator Loss: {generator_loss:.4f}")


# Step 7: Generate and Visualize Results
# After training, this function visualizes generated images
def generate_images(generator, num_images=5):
    noise = tf.random.normal((num_images, 128, 128, 3))  # Generate random noise
    generated_images = generator(noise)  # Generate images from noise

    # Plot the generated images
    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow((generated_images[i] + 1) / 2)  # Rescale image to [0, 1] for display
        plt.axis('off')
    plt.show()


# Step 8: Run Training and Visualization
train(generator, discriminator, gan, dataset, epochs=50)  # Train the GAN for 50 epochs
generate_images(generator, num_images=5)  # Generate and display 5 images after training


ValueError: Exception encountered when calling DeformableConv2D.call().

[1mCould not automatically infer the output shape / dtype of 'deformable_conv2d_6' (of type DeformableConv2D). Either the `DeformableConv2D.call()` method is incorrect, or you need to implement the `DeformableConv2D.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimensions must be equal, but are 64 and 32 for '{{node add}} = AddV2[T=DT_FLOAT](Tile, conv2d_16_1/add)' with input shapes: [?,64,64,2], [?,32,32,18].[0m

Arguments received by DeformableConv2D.call():
  • args=('<KerasTensor shape=(None, 64, 64, 64), dtype=float32, sparse=False, name=keras_tensor_20>',)
  • kwargs=<class 'inspect._empty'>

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt

# Step 1: Define the Deformable Convolution Layer
class DeformableConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='same', **kwargs):
        super(DeformableConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size[0]
        self.strides = strides
        self.padding = padding

        # Convolution layer to generate offset values (how the kernel moves)
        self.offset_conv = layers.Conv2D(2 * self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Convolution layer to generate modulation values (how much we scale the pixels)
        self.modulation_conv = layers.Conv2D(self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Final convolutional layer to generate the output features
        self.main_conv = layers.Conv2D(filters, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

    def build(self, input_shape):
        self.input_shape = input_shape  # Save the input shape for future use

    def call(self, inputs):
        # Generate offsets for the convolution grid
        offset = self.offset_conv(inputs)  # Shape: [batch_size, height, width, 2 * kernel_size^2]

        # Generate modulation weights
        modulation = tf.sigmoid(self.modulation_conv(inputs))  # Shape: [batch_size, height, width, kernel_size^2]

        # Create a grid of indices based on the image dimensions
        batch_size = tf.shape(inputs)[0]
        height, width = inputs.shape[1], inputs.shape[2]
        grid = tf.meshgrid(
            tf.range(height), tf.range(width), indexing='ij'
        )
        grid = tf.stack(grid, axis=-1)  # Shape becomes [height, width, 2]
        grid = tf.cast(grid, tf.float32)  # Cast grid indices to float32

        # Expand grid to match batch size: Shape becomes [batch_size, height, width, 2]
        grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1, 1])

        # Reshape and apply offsets to the grid, ensuring the dimensions match
        offset = tf.reshape(offset, [batch_size, height, width, 2, self.kernel_size, self.kernel_size])  # Shape: [batch_size, height, width, 2, kernel_size, kernel_size]
        offset_grid = grid[..., None, :] + offset  # Shape becomes [batch_size, height, width, kernel_size, kernel_size, 2]

        # Sample the input image based on the adjusted grid (deformable sampling)
        sampled_inputs = self._deformable_sample(inputs, offset_grid)

        # Scale the sampled inputs based on the modulation values
        sampled_inputs = sampled_inputs * modulation

        # Apply the final convolution layer
        return self.main_conv(sampled_inputs)

    def _deformable_sample(self, inputs, offset_grid):
        """Function to sample the inputs based on offset grid (not implemented here)."""
        # This is a placeholder function for the deformable sampling process.
        # In practice, you would need to implement or use a function to sample the input image
        # based on the offset_grid. This is typically done using bilinear interpolation.
        # For now, we'll assume that you have a working method for sampling.
        return inputs  # Placeholder (in practice, you will sample using the offset_grid)


# Step 2: Build the Generator Model
def build_generator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layer to downsample the input
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Apply batch normalization for stable training

    # Deformable Convolution layer to further downsample with deformable convolution
    x = DeformableConv2D(128, (3, 3), strides=2)(x)

    # Upsampling via transposed convolution (deconvolution) to generate larger feature maps
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Final convolution layer to generate the output image
    x = layers.Conv2D(3, (3, 3), padding='same', activation='tanh')(x)  # Output image with 3 channels (RGB)

    return Model(inputs, x, name="Generator")


# Step 3: Build the Discriminator Model
def build_discriminator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layers to extract features from the image
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Batch normalization to stabilize training

    x = layers.Conv2D(128, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Flatten the output and apply a fully connected layer for classification
    x = layers.Flatten()(x)
    x = layers.Dense(1, activation='sigmoid')(x)  # Sigmoid activation to classify between real (1) and fake (0)

    return Model(inputs, x, name="Discriminator")


# Step 4: Compile the Models
generator = build_generator()  # Create the generator
discriminator = build_discriminator()  # Create the discriminator

# Compile the discriminator with Adam optimizer and binary crossentropy loss
discriminator.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')
discriminator.trainable = False  # Freeze discriminator during generator training

# Create the GAN model by combining the generator and discriminator
gan_input = layers.Input(shape=(128, 128, 3))  # Input is a random noise vector of size 128x128x3
generated_image = generator(gan_input)  # Pass input noise through the generator
gan_output = discriminator(generated_image)  # Classify the generated image using the discriminator
gan = Model(gan_input, gan_output)

# Compile the GAN model
gan.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')


# Step 5: CIFAR-10 Dataset Preparation
def preprocess_images(image):
    # Resize the image to 128x128
    image = tf.image.resize(image, [128, 128])
    return (image - 127.5) / 127.5  # Normalize the pixel values to [-1, 1]

# Load the CIFAR-10 dataset from TensorFlow's built-in datasets
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()

# Preprocess and normalize the images
x_train = preprocess_images(x_train)

# Convert the images into a TensorFlow dataset and batch them
dataset = tf.data.Dataset.from_tensor_slices(x_train)
dataset = dataset.shuffle(1000).batch(32)  # Shuffle and batch the dataset


# Step 6: Training Loop
def train(generator, discriminator, gan, dataset, epochs=50):
    for epoch in range(epochs):
        for real_images in dataset:
            batch_size = real_images.shape[0]  # Get the batch size

            # Generate fake images using the generator
            noise = tf.random.normal((batch_size, 128, 128, 3))  # Random noise for generator input
            fake_images = generator(noise)  # Generate fake images from the noise

            # Create labels for real and fake images (1 for real, 0 for fake)
            real_labels = tf.ones((batch_size, 1))  # Label for real images
            fake_labels = tf.zeros((batch_size, 1))  # Label for fake images

            # Train the discriminator
            with tf.GradientTape() as tape:
                real_output = discriminator(real_images)  # Get discriminator output for real images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                d_loss_real = tf.keras.losses.binary_crossentropy(real_labels, real_output)
                d_loss_fake = tf.keras.losses.binary_crossentropy(fake_labels, fake_output)
                d_loss = d_loss_real + d_loss_fake  # Total discriminator loss
            grads = tape.gradient(d_loss, discriminator.trainable_variables)  # Compute gradients
            discriminator.optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))  # Apply gradients

            # Train the generator (via the GAN model)
            with tf.GradientTape() as tape:
                fake_images = generator(noise)  # Generate fake images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                g_loss = tf.keras.losses.binary_crossentropy(real_labels, fake_output)  # Generator loss
            grads = tape.gradient(g_loss, generator.trainable_variables)  # Compute gradients
            gan.optimizer.apply_gradients(zip(grads, generator.trainable_variables))  # Apply gradients

        print(f'Epoch {epoch + 1}/{epochs} | D Loss: {d_loss.numpy()} | G Loss: {g_loss.numpy()}')

        if epoch % 10 == 0:
            generate_images(generator)

# Generate images function for visualization
def generate_images(generator, num_images=10):
    noise = tf.random.normal((num_images, 128, 128, 3))  # Random noise
    generated_images = generator(noise)  # Generate images from noise

    # Rescale the images to [0, 255] range for visualization
    generated_images = (generated_images + 1) * 127.5  # Rescale from [-1, 1] to [0, 255]

    # Display the generated images
    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(generated_images[i].numpy().astype("uint8"))
        plt.axis('off')
    plt.show()

# Step 7: Start training the GAN
train(generator, discriminator, gan, dataset, epochs=50)

# Generate and visualize some fake images after training
generate_images(generator)


ValueError: Exception encountered when calling DeformableConv2D.call().

[1mCould not automatically infer the output shape / dtype of 'deformable_conv2d_8' (of type DeformableConv2D). Either the `DeformableConv2D.call()` method is incorrect, or you need to implement the `DeformableConv2D.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimensions must be equal, but are 64 and 2 for '{{node add}} = AddV2[T=DT_FLOAT](strided_slice_1, Reshape)' with input shapes: [?,64,64,1,2], [?,64,64,2,3,3].[0m

Arguments received by DeformableConv2D.call():
  • args=('<KerasTensor shape=(None, 64, 64, 64), dtype=float32, sparse=False, name=keras_tensor_26>',)
  • kwargs=<class 'inspect._empty'>

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt

# Step 1: Define the Deformable Convolution Layer
class DeformableConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='same', **kwargs):
        super(DeformableConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size[0]
        self.strides = strides
        self.padding = padding

        # Convolution layer to generate offset values (how the kernel moves)
        self.offset_conv = layers.Conv2D(2 * self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Convolution layer to generate modulation values (how much we scale the pixels)
        self.modulation_conv = layers.Conv2D(self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Final convolutional layer to generate the output features
        self.main_conv = layers.Conv2D(filters, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

    def build(self, input_shape):
        self.input_shape = input_shape  # Save the input shape for future use

    def call(self, inputs):
        # Generate offsets for the convolution grid
        offset = self.offset_conv(inputs)  # Shape: [batch_size, height, width, 2 * kernel_size^2]

        # Generate modulation weights
        modulation = tf.sigmoid(self.modulation_conv(inputs))  # Shape: [batch_size, height, width, kernel_size^2]

        # Create a grid of indices based on the image dimensions
        batch_size = tf.shape(inputs)[0]
        height, width = inputs.shape[1], inputs.shape[2]
        grid = tf.meshgrid(
            tf.range(height), tf.range(width), indexing='ij'
        )
        grid = tf.stack(grid, axis=-1)  # Shape becomes [height, width, 2]
        grid = tf.cast(grid, tf.float32)  # Cast grid indices to float32

        # Expand grid to match batch size: Shape becomes [batch_size, height, width, 2]
        grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1, 1])

        # Reshape and apply offsets to the grid, ensuring the dimensions match
        offset = tf.reshape(offset, [batch_size, height, width, self.kernel_size, self.kernel_size, 2])  # Shape: [batch_size, height, width, kernel_size, kernel_size, 2]
        grid_expanded = grid[..., None, None, :]  # Shape: [batch_size, height, width, 1, 1, 2]

        offset_grid = grid_expanded + offset  # Shape becomes [batch_size, height, width, kernel_size, kernel_size, 2]

        # Sample the input image based on the adjusted grid (deformable sampling)
        sampled_inputs = self._deformable_sample(inputs, offset_grid)

        # Scale the sampled inputs based on the modulation values
        sampled_inputs = sampled_inputs * modulation

        # Apply the final convolution layer
        return self.main_conv(sampled_inputs)

    def _deformable_sample(self, inputs, offset_grid):
        """Function to sample the inputs based on offset grid (not implemented here)."""
        # This is a placeholder function for the deformable sampling process.
        # In practice, you would need to implement or use a function to sample the input image
        # based on the offset_grid. This is typically done using bilinear interpolation.
        # For now, we'll assume that you have a working method for sampling.
        return inputs  # Placeholder (in practice, you will sample using the offset_grid)


# Step 2: Build the Generator Model
def build_generator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layer to downsample the input
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Apply batch normalization for stable training

    # Deformable Convolution layer to further downsample with deformable convolution
    x = DeformableConv2D(128, (3, 3), strides=2)(x)

    # Upsampling via transposed convolution (deconvolution) to generate larger feature maps
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Final convolution layer to generate the output image
    x = layers.Conv2D(3, (3, 3), padding='same', activation='tanh')(x)  # Output image with 3 channels (RGB)

    return Model(inputs, x, name="Generator")


# Step 3: Build the Discriminator Model
def build_discriminator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layers to extract features from the image
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Batch normalization to stabilize training

    x = layers.Conv2D(128, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Flatten the output and apply a fully connected layer for classification
    x = layers.Flatten()(x)
    x = layers.Dense(1, activation='sigmoid')(x)  # Sigmoid activation to classify between real (1) and fake (0)

    return Model(inputs, x, name="Discriminator")


# Step 4: Compile the Models
generator = build_generator()  # Create the generator
discriminator = build_discriminator()  # Create the discriminator

# Compile the discriminator with Adam optimizer and binary crossentropy loss
discriminator.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')
discriminator.trainable = False  # Freeze discriminator during generator training

# Create the GAN model by combining the generator and discriminator
gan_input = layers.Input(shape=(128, 128, 3))  # Input is a random noise vector of size 128x128x3
generated_image = generator(gan_input)  # Pass input noise through the generator
gan_output = discriminator(generated_image)  # Classify the generated image using the discriminator
gan = Model(gan_input, gan_output)

# Compile the GAN model
gan.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')


# Step 5: CIFAR-10 Dataset Preparation
def preprocess_images(image):
    # Resize the image to 128x128
    image = tf.image.resize(image, [128, 128])
    return (image - 127.5) / 127.5  # Normalize the pixel values to [-1, 1]

# Load the CIFAR-10 dataset from TensorFlow's built-in datasets
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()

# Preprocess and normalize the images
x_train = preprocess_images(x_train)

# Convert the images into a TensorFlow dataset and batch them
dataset = tf.data.Dataset.from_tensor_slices(x_train)
dataset = dataset.shuffle(1000).batch(32)  # Shuffle and batch the dataset


# Step 6: Training Loop
def train(generator, discriminator, gan, dataset, epochs=50):
    for epoch in range(epochs):
        for real_images in dataset:
            batch_size = real_images.shape[0]  # Get the batch size

            # Generate fake images using the generator
            noise = tf.random.normal((batch_size, 128, 128, 3))  # Random noise for generator input
            fake_images = generator(noise)  # Generate fake images from the noise

            # Create labels for real and fake images (1 for real, 0 for fake)
            real_labels = tf.ones((batch_size, 1))  # Label for real images
            fake_labels = tf.zeros((batch_size, 1))  # Label for fake images

            # Train the discriminator
            with tf.GradientTape() as tape:
                real_output = discriminator(real_images)  # Get discriminator output for real images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                d_loss_real = tf.keras.losses.binary_crossentropy(real_labels, real_output)
                d_loss_fake = tf.keras.losses.binary_crossentropy(fake_labels, fake_output)
                d_loss = d_loss_real + d_loss_fake  # Total discriminator loss
            grads = tape.gradient(d_loss, discriminator.trainable_variables)  # Compute gradients
            discriminator.optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))  # Apply gradients

            # Train the generator (via the GAN model)
            with tf.GradientTape() as tape:
                fake_images = generator(noise)  # Generate fake images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                g_loss = tf.keras.losses.binary_crossentropy(real_labels, fake_output)  # Generator loss
            grads = tape.gradient(g_loss, generator.trainable_variables)  # Compute gradients
            gan.optimizer.apply_gradients(zip(grads, generator.trainable_variables))  # Apply gradients

        print(f'Epoch {epoch + 1}/{epochs} | D Loss: {d_loss.numpy()} | G Loss: {g_loss.numpy()}')

        if epoch % 10 == 0:
            generate_images(generator)


# Generate images function for visualization
def generate_images(generator, num_images=10):
    noise = tf.random.normal((num_images, 128, 128, 3))  # Random noise
    generated_images = generator(noise)  # Generate images from noise

    # Rescale the images to [0, 255] range for visualization
    generated_images = (generated_images + 1) * 127.5  # Rescale from [-1, 1] to [0, 255]

    # Display the generated images
    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(generated_images[i].numpy().astype("uint8"))
        plt.axis('off')
    plt.show()

# Step 7: Start training the GAN
train(generator, discriminator, gan, dataset, epochs=50)

# Generate and visualize some fake images after training
generate_images(generator)


ValueError: Exception encountered when calling DeformableConv2D.call().

[1mCould not automatically infer the output shape / dtype of 'deformable_conv2d_9' (of type DeformableConv2D). Either the `DeformableConv2D.call()` method is incorrect, or you need to implement the `DeformableConv2D.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimensions must be equal, but are 64 and 32 for '{{node mul}} = Mul[T=DT_FLOAT](Placeholder, Sigmoid)' with input shapes: [?,64,64,64], [?,32,32,9].[0m

Arguments received by DeformableConv2D.call():
  • args=('<KerasTensor shape=(None, 64, 64, 64), dtype=float32, sparse=False, name=keras_tensor_29>',)
  • kwargs=<class 'inspect._empty'>

In [11]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt

# Step 1: Define the Deformable Convolution Layer
class DeformableConv2D(layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='same', **kwargs):
        super(DeformableConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size[0]
        self.strides = strides
        self.padding = padding

        # Convolution layer to generate offset values (how the kernel moves)
        self.offset_conv = layers.Conv2D(2 * self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Convolution layer to generate modulation values (how much we scale the pixels)
        self.modulation_conv = layers.Conv2D(self.kernel_size * self.kernel_size, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

        # Final convolutional layer to generate the output features
        self.main_conv = layers.Conv2D(filters, (self.kernel_size, self.kernel_size), padding=padding, strides=strides)

    def build(self, input_shape):
        self.input_shape = input_shape  # Save the input shape for future use

    def call(self, inputs):
        # Generate offsets for the convolution grid
        offset = self.offset_conv(inputs)  # Shape: [batch_size, height, width, 2 * kernel_size^2]

        # Generate modulation weights
        modulation = tf.sigmoid(self.modulation_conv(inputs))  # Shape: [batch_size, height, width, kernel_size^2]

        # Create a grid of indices based on the image dimensions
        batch_size = tf.shape(inputs)[0]
        height, width = inputs.shape[1], inputs.shape[2]
        grid = tf.meshgrid(
            tf.range(height), tf.range(width), indexing='ij'
        )
        grid = tf.stack(grid, axis=-1)  # Shape becomes [height, width, 2]
        grid = tf.cast(grid, tf.float32)  # Cast grid indices to float32

        # Expand grid to match batch size: Shape becomes [batch_size, height, width, 2]
        grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1, 1])

        # Reshape and apply offsets to the grid, ensuring the dimensions match
        offset = tf.reshape(offset, [batch_size, height, width, self.kernel_size, self.kernel_size, 2])  # Shape: [batch_size, height, width, kernel_size, kernel_size, 2]
        grid_expanded = grid[..., None, None, :]  # Shape: [batch_size, height, width, 1, 1, 2]

        offset_grid = grid_expanded + offset  # Shape becomes [batch_size, height, width, kernel_size, kernel_size, 2]

        # Sample the input image based on the adjusted grid (deformable sampling)
        sampled_inputs = self._deformable_sample(inputs, offset_grid)

        # Ensure that modulation has the same spatial dimensions as sampled inputs
        modulation = tf.image.resize(modulation, [sampled_inputs.shape[1], sampled_inputs.shape[2]])  # Resize modulation to match the height/width

        # Scale the sampled inputs based on the modulation values
        sampled_inputs = sampled_inputs * modulation

        # Apply the final convolution layer
        return self.main_conv(sampled_inputs)

    def _deformable_sample(self, inputs, offset_grid):
        """Function to sample the inputs based on offset grid (not implemented here)."""
        # This is a placeholder function for the deformable sampling process.
        # In practice, you would need to implement or use a function to sample the input image
        # based on the offset_grid. This is typically done using bilinear interpolation.
        # For now, we'll assume that you have a working method for sampling.
        return inputs  # Placeholder (in practice, you will sample using the offset_grid)


# Step 2: Build the Generator Model
def build_generator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layer to downsample the input
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Apply batch normalization for stable training

    # Deformable Convolution layer to further downsample with deformable convolution
    x = DeformableConv2D(128, (3, 3), strides=2)(x)

    # Upsampling via transposed convolution (deconvolution) to generate larger feature maps
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Final convolution layer to generate the output image
    x = layers.Conv2D(3, (3, 3), padding='same', activation='tanh')(x)  # Output image with 3 channels (RGB)

    return Model(inputs, x, name="Generator")


# Step 3: Build the Discriminator Model
def build_discriminator():
    inputs = layers.Input(shape=(128, 128, 3))  # Input is an image of size 128x128 with 3 channels (RGB)

    # Convolutional layers to extract features from the image
    x = layers.Conv2D(64, (3, 3), strides=2, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)  # Batch normalization to stabilize training

    x = layers.Conv2D(128, (3, 3), strides=2, padding='same', activation='relu')(x)

    # Flatten the output and apply a fully connected layer for classification
    x = layers.Flatten()(x)
    x = layers.Dense(1, activation='sigmoid')(x)  # Sigmoid activation to classify between real (1) and fake (0)

    return Model(inputs, x, name="Discriminator")


# Step 4: Compile the Models
generator = build_generator()  # Create the generator
discriminator = build_discriminator()  # Create the discriminator

# Compile the discriminator with Adam optimizer and binary crossentropy loss
discriminator.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')
discriminator.trainable = False  # Freeze discriminator during generator training

# Create the GAN model by combining the generator and discriminator
gan_input = layers.Input(shape=(128, 128, 3))  # Input is a random noise vector of size 128x128x3
generated_image = generator(gan_input)  # Pass input noise through the generator
gan_output = discriminator(generated_image)  # Classify the generated image using the discriminator
gan = Model(gan_input, gan_output)

# Compile the GAN model
gan.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy')


# Step 5: CIFAR-10 Dataset Preparation
def preprocess_images(image):
    # Resize the image to 128x128
    image = tf.image.resize(image, [128, 128])
    return (image - 127.5) / 127.5  # Normalize the pixel values to [-1, 1]

# Load the CIFAR-10 dataset from TensorFlow's built-in datasets
(x_train, _), (_, _) = tf.keras.datasets.cifar10.load_data()

# Preprocess and normalize the images
x_train = preprocess_images(x_train)

# Convert the images into a TensorFlow dataset and batch them
dataset = tf.data.Dataset.from_tensor_slices(x_train)
dataset = dataset.shuffle(1000).batch(32)  # Shuffle and batch the dataset


# Step 6: Training Loop
def train(generator, discriminator, gan, dataset, epochs=50):
    for epoch in range(epochs):
        for real_images in dataset:
            batch_size = real_images.shape[0]
            noise = tf.random.normal([batch_size, 128, 128, 3])  # Random noise to generate fake images
            fake_images = generator(noise)  # Generate fake images from noise

            # Labels for real and fake images
            real_labels = tf.ones((batch_size, 1))  # Label for real images
            fake_labels = tf.zeros((batch_size, 1))  # Label for fake images

            # Train the discriminator
            with tf.GradientTape() as tape:
                real_output = discriminator(real_images)  # Get discriminator output for real images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                d_loss_real = tf.keras.losses.binary_crossentropy(real_labels, real_output)
                d_loss_fake = tf.keras.losses.binary_crossentropy(fake_labels, fake_output)
                d_loss = d_loss_real + d_loss_fake  # Total discriminator loss
            grads = tape.gradient(d_loss, discriminator.trainable_variables)  # Compute gradients
            discriminator.optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))  # Apply gradients

            # Train the generator (via the GAN model)
            with tf.GradientTape() as tape:
                fake_images = generator(noise)  # Generate fake images
                fake_output = discriminator(fake_images)  # Get discriminator output for fake images
                g_loss = tf.keras.losses.binary_crossentropy(real_labels, fake_output)  # Generator loss
            grads = tape.gradient(g_loss, generator.trainable_variables)  # Compute gradients
            gan.optimizer.apply_gradients(zip(grads, generator.trainable_variables))  # Apply gradients

        print(f'Epoch {epoch + 1}/{epochs} | D Loss: {d_loss.numpy()} | G Loss: {g_loss.numpy()}')

        if epoch % 10 == 0:
            generate_images(generator)


# Generate images function for visualization
def generate_images(generator, num_images=10):
    noise = tf.random.normal((num_images, 128, 128, 3))  # Random noise
    generated_images = generator(noise)  # Generate images from noise

    # Rescale the images to [0, 255] range for visualization
    generated_images = (generated_images + 1) * 127.5  # Rescale from [-1, 1] to [0, 255]

    # Display the generated images
    plt.figure(figsize=(10, 10))
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(generated_images[i].numpy().astype("uint8"))
        plt.axis('off')
    plt.show()

# Step 7: Start training the GAN
train(generator, discriminator, gan, dataset, epochs=50)

# Generate and visualize some fake images after training
generate_images(generator)


ValueError: Exception encountered when calling DeformableConv2D.call().

[1mCould not automatically infer the output shape / dtype of 'deformable_conv2d_10' (of type DeformableConv2D). Either the `DeformableConv2D.call()` method is incorrect, or you need to implement the `DeformableConv2D.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimensions must be equal, but are 64 and 9 for '{{node mul}} = Mul[T=DT_FLOAT](Placeholder, resize/ResizeBilinear)' with input shapes: [?,64,64,64], [?,64,64,9].[0m

Arguments received by DeformableConv2D.call():
  • args=('<KerasTensor shape=(None, 64, 64, 64), dtype=float32, sparse=False, name=keras_tensor_32>',)
  • kwargs=<class 'inspect._empty'>