### Installing Packages


In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv2

# Check TensorFlow version

print(tf.__version__)
print (cv2.__version__)

2.16.1
4.10.0


In [2]:
try:
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
except:
    print("TensorFlow setup not working correctly.")

Num GPUs Available:  0


### Load Array of data

In [3]:
import os as os
# Directory paths
person_images_dir = '/kaggle/input/high-resolution-viton-zalando-dataset/test/agnostic-v3.2'
cloth_images_dir = '/kaggle/input/high-resolution-viton-zalando-dataset/test/cloth'
mask_images_dir = '/kaggle/input/high-resolution-viton-zalando-dataset/test/image-parse-v3'
output_images_dir= '/kaggle/input/high-resolution-viton-zalando-dataset/test/image'

# Load all images in the directory
person_images = []
cloth_images = []
mask_images = []
output_images = []

for person_filename, cloth_filename , mask_filename, output_filename in zip(sorted(os.listdir(person_images_dir)), sorted(os.listdir(cloth_images_dir)), sorted (os.listdir(mask_images_dir)), sorted(os.listdir(output_images_dir))):
    person_img_path = os.path.join(person_images_dir, person_filename)
    cloth_img_path = os.path.join(cloth_images_dir, cloth_filename)
    mask_img_path = os.path.join(mask_images_dir, mask_filename)
    output_img_path = os.path.join(output_images_dir, output_filename)
    
    # Load, resize, and normalize the images
    person_image = cv2.imread(person_img_path)
    person_image = cv2.cvtColor(person_image, cv2.COLOR_BGR2RGB)
    person_image = cv2.resize(person_image, (128,128)) / 255.0
    
    cloth_image = cv2.imread(cloth_img_path)
    cloth_image = cv2.cvtColor(cloth_image, cv2.COLOR_BGR2RGB)
    cloth_image = cv2.resize(cloth_image, (128,128)) / 255.0
    
    mask_image = cv2.imread(mask_img_path)
    mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGR2RGB)
    mask_image = cv2.resize(mask_image, (128,128)) / 255.0
    mask_image = np.mean(mask_image, axis=-1, keepdims=True)
    
    output_image = cv2.imread(output_img_path)
    output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
    output_image = cv2.resize(output_image, (128,128)) / 255.0
    
    person_images.append(person_image)
    cloth_images.append(cloth_image)
    mask_images.append(mask_image)
    output_images.append(output_image)

# Convert to numpy arrays
person_images = np.array(person_images)
cloth_images = np.array(cloth_images)
mask_images = np.array(mask_images)
output_images = np.array(output_images)

# Print the shape of the arrays to verify
print(f"Loaded {person_images.shape[0]} person images.")
print(f"Loaded {cloth_images.shape[0]} cloth images.")
print(f"Loaded {mask_images.shape[0]} mask images.")
print(f"Loaded {output_images.shape[0]} output images.")


Loaded 2032 person images.
Loaded 2032 cloth images.
Loaded 2032 mask images.
Loaded 2032 output images.


### Build the deepUnet model

In [4]:
from tensorflow.keras import layers, models, losses, applications
import tensorflow as tf


# Using VGG19 for perceptual loss
# vgg = applications.VGG19(include_top=False, weights='imagenet', input_shape=(128, 128, 3))
# def perceptual_loss(y_true, y_pred):
#     vgg.trainable = False
#     feature_extractor = models.Model(inputs=vgg.input, outputs=[vgg.get_layer('block5_conv4').output])

#     y_true_features = feature_extractor(y_true)
#     y_pred_features = feature_extractor(y_pred)

#     return tf.reduce_mean(tf.square(y_true_features - y_pred_features))

def build_unet_virtual_tryon_model():
    inputs_person = layers.Input(shape=(128, 128, 3))
    inputs_cloth = layers.Input(shape=(128, 128, 3))
    inputs_mask = layers.Input(shape=(128, 128, 1))

    # Encoder for person image
    x_person_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_person)
    x_person_skip = layers.MaxPooling2D((2, 2))(x_person_og)
    x_person = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_person_skip)
    x_person = layers.MaxPooling2D((2, 2))(x_person)
    x_person = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_person)
    x_person = layers.MaxPooling2D((2, 2))(x_person)

    # Encoder for cloth image
    x_cloth_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_cloth)
    x_cloth_skip = layers.MaxPooling2D((2, 2))(x_cloth_og)
    x_cloth = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_cloth_skip)
    x_cloth_2 = layers.MaxPooling2D((2, 2))(x_cloth)
    x_cloth = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_cloth_2)
    x_cloth = layers.MaxPooling2D((2, 2))(x_cloth)

    # Encoder for segmentation mask
    x_mask_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_mask)
    x_mask_skip = layers.MaxPooling2D((2, 2))(x_mask_og)
    x_mask = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_mask_skip)
    x_mask = layers.MaxPooling2D((2, 2))(x_mask)
    x_mask = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_mask)
    x_mask = layers.MaxPooling2D((2, 2))(x_mask)

    # Concatenate features from all branches
    concatenated = layers.concatenate([x_person, x_cloth, x_mask], axis=-1)

    # Decoder part with skip connections
    x = layers.Conv2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(concatenated)
    x = layers.concatenate([x, x_cloth_2], axis=-1)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)

    x = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(x)
    print(x.shape)
    x = layers.concatenate([x, x_cloth_skip], axis=-1)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)

    x = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(x)
    x = layers.concatenate([x, x_person_og], axis=-1)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)

    # Output layer
    output_image = layers.Conv2D(3, (1, 1), activation='sigmoid')(x)

    # Build and compile the model
    model = models.Model(inputs=[inputs_person, inputs_cloth, inputs_mask], outputs=output_image)
    model.compile(optimizer='adam', loss='mse')

    return model

model_deepunet = build_unet_virtual_tryon_model()
model_deepunet.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
(None, 64, 64, 128)


## Train the model

In [5]:
X_person = np.array(person_images)
X_cloth = np.array(cloth_images)
Y_output = np.array(output_images)

# Now the shape of segmentation_image will be (256, 256, 1)
X_segmentation = np.array(mask_images)

model_deepunet.fit([X_person, X_cloth, X_segmentation], Y_output, epochs=15)

Epoch 1/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m850s[0m 13s/step - loss: 0.0630
Epoch 2/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m821s[0m 13s/step - loss: 0.0199
Epoch 3/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m823s[0m 13s/step - loss: 0.0150
Epoch 4/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m827s[0m 13s/step - loss: 0.0121
Epoch 5/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m855s[0m 13s/step - loss: 0.0110
Epoch 6/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m861s[0m 13s/step - loss: 0.0096
Epoch 7/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m818s[0m 13s/step - loss: 0.0091
Epoch 8/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m864s[0m 13s/step - loss: 0.0082
Epoch 9/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m859s[0m 13s/step - loss: 0.0085
Epoch 10/15
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m818s[0m 13s/step - lo

<keras.src.callbacks.history.History at 0x7880918777f0>

In [6]:
model_deepunet.save('deepunet_model.h5')  # Save the model in HDF5 format

In [None]:
# Verify that the model can run one epoch of training
test_loss = model_unet.evaluate([X_person, X_cloth, X_segmentation], Y_output)
print(f"Test Loss after one epoch: {test_loss}")


## Visualize output

In [3]:
# Test the model on the same input
model_deep_unet = deepunet_model.h5
predicted_image = model_deepunet.predict([X_person, X_cloth, X_segmentation])

np.save('predicted_image.npy', predicted_image)

# Display the original and predicted images
plt.figure(figsize=(10, 5))

plt.subplot(1, 3, 1)
plt.title("Cloth Image")
plt.imshow(X_person[1])

plt.subplot(1, 3, 2)
plt.title("Output Image")
plt.imshow(Y_output[1])

plt.subplot(1, 3, 3)
plt.title("Predicted Try-On Image")
plt.imshow(predicted_image[1])

plt.show()

NameError: name 'deepunet_model' is not defined

In [8]:
# from tensorflow.keras.losses import MeanSquaredError

# mse = MeanSquaredError()
# mse_value = mse(Y_output_test, predictions).numpy()
# print(f'MSE: {mse_value}')

def psnr(target, prediction):
    mse = np.mean((target - prediction) ** 2)
    return 10 * np.log10(1.0 / mse)

psnr = psnr(Y_output, predicted_image)
print(psnr)

22.09979957786572


# Genertating agnostic from output image (testing purpose)
## Can be ignored as GANs are computationally Expensive and might crash the session.

In [None]:
from tensorflow.keras.layers import Input, Concatenate

def build_generator(input_shape=(128, 128, 3), mask_shape=(128, 128, 1)):
    person_input = Input(shape=input_shape, name='person_input')
    mask_input = Input(shape=mask_shape, name='mask_input')
    
    # Combine person image and mask as input
    combined_input = Concatenate()([person_input, mask_input])

    x = layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same')(combined_input)
    x = layers.LeakyReLU()(x)
    
    x = layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)
    
    x = layers.Conv2D(256, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)
    
    x = layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)
    
    x = layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)
    
    output = layers.Conv2DTranspose(3, (4, 4), strides=(2, 2), padding='same', activation='tanh')(x)
    
    model = tf.keras.Model(inputs=[person_input, mask_input], outputs=output)
    return model

generator = build_generator()
generator.summary()


In [None]:
def build_discriminator(input_shape=(128, 128, 3), agnostic_shape=(128, 128, 3)):
    person_input = Input(shape=input_shape, name='person_input')
    agnostic_input = Input(shape=agnostic_shape, name='agnostic_input')

    combined_input = Concatenate()([person_input, agnostic_input])

    x = layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same')(combined_input)
    x = layers.LeakyReLU()(x)

    x = layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)

    x = layers.Conv2D(256, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)

    x = layers.Flatten()(x)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=[person_input, agnostic_input], outputs=output)
    return model

discriminator = build_discriminator()
discriminator.summary()


In [None]:
import tensorflow as tf

# Define constants
BATCH_SIZE = 64
EPOCHS = 10
NOISE_DIM = 100  # Define if using noise

# Define the loss functions
def generator_loss(fake_output):
    return tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output)

def discriminator_loss(real_output, fake_output):
    real_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output)
    fake_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output)
    return real_loss + fake_loss

# Define optimizers
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

@tf.function
def train_step(predicted_image, cloth_images, mask_images):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        # Generate agnostic clothing image from person and mask input
        generated_cloths = generator([predicted_image, mask_images], training=True)

        # Discriminator outputs
        real_output = discriminator([predicted_image, cloth_images], training=True)
        fake_output = discriminator([predicted_image, generated_cloths], training=True)

        # Calculate losses
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    # Apply gradients
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

def train(predicted_image, cloth_images, mask_images, epochs):
    dataset_size = len(predicted_image)  # Fixed the variable name
    for epoch in range(epochs):
        for i in range(0, dataset_size, BATCH_SIZE):
            predicted_batch = predicted_image[i:i+BATCH_SIZE]
            cloth_batch = cloth_images[i:i+BATCH_SIZE]
            mask_batch = mask_images[i:i+BATCH_SIZE]
            train_step(predicted_batch, cloth_batch, mask_batch)
        print(f'Epoch {epoch + 1} completed')

# Start training
train(predicted_image, cloth_images, mask_images, EPOCHS)


In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os

# Define constants
BATCH_SIZE = 64
EPOCHS = 5
NOISE_DIM = 100  # Define if using noise
OUTPUT_DIR = './generated_images'  # Directory to save generated images

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define the loss functions
def generator_loss(fake_output):
    return tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output)

def discriminator_loss(real_output, fake_output):
    real_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output)
    fake_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output)
    return real_loss + fake_loss

# Define optimizers
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

@tf.function
def train_step(predicted_image, cloth_images, mask_images):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        # Generate agnostic clothing image from person and mask input
        generated_cloths = generator([predicted_image, mask_images], training=True)

        # Discriminator outputs
        real_output = discriminator([predicted_image, cloth_images], training=True)
        fake_output = discriminator([predicted_image, generated_cloths], training=True)

        # Calculate losses
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    # Apply gradients
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

def save_images(images, epoch, step):
    """
    Saves a batch of images to disk.
    
    Args:
        images (numpy.ndarray): Batch of images to save.
        epoch (int): Current epoch number.
        step (int): Current step number in the epoch.
    """
    images = (images * 127.5 + 127.5).astype(np.uint8)  # Denormalize images to 0-255 range
    for i, img in enumerate(images):
        plt.imsave(f"{OUTPUT_DIR}/image_epoch_{epoch:03d}_step_{step:04d}_{i:03d}.png", img)

def display_image(image, epoch):
    """
    Displays the first image in the batch after every 5 epochs.
    
    Args:
        image (numpy.ndarray): The image array to display.
        epoch (int): The current epoch number.
    """
    plt.figure(figsize=(4, 4))
    plt.imshow((image * 127.5 + 127.5).astype(np.uint8))
    plt.title(f"Generated Image at Epoch {epoch}")
    plt.axis('off')
    plt.show()

def train(predicted_image, cloth_images, mask_images, epochs):
    dataset_size = len(predicted_image)  # Fixed the variable name
    for epoch in range(epochs):
        for i in range(0, dataset_size, BATCH_SIZE):
            predicted_batch = predicted_image[i:i+BATCH_SIZE]
            cloth_batch = cloth_images[i:i+BATCH_SIZE]
            mask_batch = mask_images[i:i+BATCH_SIZE]
            train_step(predicted_batch, cloth_batch, mask_batch)

            # Generate images to visualize progress
            if (i // BATCH_SIZE) % 10 == 0:  # Save every 10 steps
                generated_cloths = generator([predicted_batch, mask_batch], training=False)
                save_images(generated_cloths.numpy(), epoch, i // BATCH_SIZE)
        
        # Visualize the first image after every 5 epochs
        if (epoch + 1) % 5 == 0:
            generated_cloths = generator([predicted_image[:BATCH_SIZE], mask_images[:BATCH_SIZE]], training=False)
            display_image(generated_cloths[0].numpy(), epoch + 1)
                
        print(f'Epoch {epoch + 1} completed')

# Start training
train(predicted_image, cloth_images, mask_images, EPOCHS)


# Fine tuning with deeper model and gan approach

## Can be ignored as GANs are computationally Expensive and might crash the session.

In [None]:
from tensorflow.keras import layers, models

def build_unet_virtual_tryon_model():
    inputs_person = layers.Input(shape=(128, 128, 3))
    inputs_cloth = layers.Input(shape=(128, 128, 3))
    inputs_mask = layers.Input(shape=(128, 128, 1))

    # Encoder for person image
    x_person_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_person)
    x_person_skip = layers.MaxPooling2D((2, 2))(x_person_og)
    x_person = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_person_skip)
    x_person = layers.MaxPooling2D((2, 2))(x_person)
    x_person = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_person)
    x_person = layers.MaxPooling2D((2, 2))(x_person)

    # Encoder for cloth image
    x_cloth_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_cloth)
    x_cloth_skip = layers.MaxPooling2D((2, 2))(x_cloth_og)
    x_cloth = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_cloth_skip)
    x_cloth_2 = layers.MaxPooling2D((2, 2))(x_cloth)
    x_cloth = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_cloth_2)
    x_cloth = layers.MaxPooling2D((2, 2))(x_cloth)

    # Encoder for segmentation mask
    x_mask_og = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs_mask)
    x_mask_skip = layers.MaxPooling2D((2, 2))(x_mask_og)
    x_mask = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x_mask_skip)
    x_mask = layers.MaxPooling2D((2, 2))(x_mask)
    x_mask = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x_mask)
    x_mask = layers.MaxPooling2D((2, 2))(x_mask)
    
    print(x_person.shape)
    print(x_mask.shape)
    print(x_cloth.shape)

    # Concatenate features from all branches
    concatenated = layers.concatenate([x_person, x_cloth, x_mask], axis=-1)
    print(concatenated.shape)

    # Decoder part with skip connections
    x = layers.Conv2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(concatenated)
    x = layers.concatenate([x, x_cloth_2], axis=-1)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)

    x = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(x)
    print(x.shape)
    x = layers.concatenate([x, x_cloth_skip], axis=-1)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)

    x = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(x)
    x = layers.concatenate([x, x_person_og], axis=-1)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)

    # Output layer
    output_image = layers.Conv2D(3, (1, 1), activation='sigmoid')(x)

    # Build and compile the model
    model = models.Model(inputs=[inputs_person, inputs_cloth, inputs_mask], outputs=output_image)
    model.compile(optimizer='adam', loss='mse')  # Using MSE or perceptual loss

    return model

# Create and verify the generator
generator = build_unet_virtual_tryon_model()
generator.summary()

# Verification Block: Checking the input and output shapes
sample_person = tf.random.normal((1, 128, 128, 3))
sample_cloth = tf.random.normal((1, 128, 128, 3))
sample_mask = tf.random.normal((1, 128, 128, 1))

sample_output = generator([sample_person, sample_cloth, sample_mask])
print(f"Generator Input Shapes: {sample_person.shape}, {sample_cloth.shape}, {sample_mask.shape}")
print(f"Generator Output Shape: {sample_output.shape}")


In [None]:
print(f"Model has been built with {len(combined.layers)} layers.")

In [None]:
from tensorflow.keras import layers, models, optimizers

def build_discriminator():
    inputs = layers.Input(shape=(128, 128, 3))

    # Discriminator architecture
    x = layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same')(inputs)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(256, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(512, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(1, activation='sigmoid')(x)

    # Build and compile the discriminator
    discriminator = models.Model(inputs, x)
    discriminator.compile(optimizer=optimizers.Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy', metrics=['accuracy'])

    return discriminator

# Create and verify the discriminator
discriminator = build_discriminator()
discriminator.summary()

# Verification Block: Checking the input and output shapes
sample_input = tf.random.normal((1, 128, 128, 3))  # A sample input
sample_output = discriminator(sample_input)
print(f"Discriminator Input Shape: {sample_input.shape}, Output Shape: {sample_output.shape}")


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras import layers, optimizers

# Freeze the discriminator when training the generator
discriminator.trainable = False

# Inputs for GAN
inputs_person = layers.Input(shape=(128, 128, 3))
inputs_cloth = layers.Input(shape=(128, 128, 3))
inputs_mask = layers.Input(shape=(128, 128, 1))

# Generator output
generated_image = generator([inputs_person, inputs_cloth, inputs_mask])

# Discriminator output on the generated image
validity = discriminator(generated_image)

# Combined model: Generator tries to fool the discriminator
combined = Model([inputs_person, inputs_cloth, inputs_mask], validity)
combined.compile(optimizer=optimizers.Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

combined.summary()

# Verification Block: Checking if the combined model's output shape matches expected validity output
combined_output = combined([sample_person, sample_cloth, sample_mask])
print(f"Combined Model Output Shape: {combined_output.shape}")  # Expected to be (batch_size, 1)


In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

def visualize_results(generator, input_data, real_images, num_samples=5, epoch=None):
    """
    Visualize real and generated images side by side to evaluate quality.

    Args:
        generator: Trained generator model.
        input_data: Tuple containing (input_person, input_cloth, input_mask).
        real_images: Ground truth images for comparison.
        num_samples: Number of samples to visualize.
        epoch: Current epoch number for labeling (optional).
    """
    # Generate images using the generator
    generated_images = generator.predict(input_data)

    # Select random samples for visualization
    indices = np.random.choice(range(len(real_images)), num_samples, replace=False)

    # Plot real vs generated images
    plt.figure(figsize=(12, num_samples * 2))

    for i, idx in enumerate(indices):
        # Real image
        plt.subplot(num_samples, 2, i * 2 + 1)
        plt.imshow(real_images[idx])
        plt.title("Real Image")
        plt.axis('off')

        # Generated image
        plt.subplot(num_samples, 2, i * 2 + 2)
        plt.imshow(generated_images[idx])
        plt.title("Generated Image")
        plt.axis('off')

    plt.tight_layout()
    if epoch is not None:
        plt.suptitle(f'Epoch {epoch}', fontsize=16)
    plt.show()

    # Optionally, save images
    plt.savefig(f'generated_images_epoch_{epoch}.png')

def train_gan(generator, discriminator, combined, epochs, batch_size, X_person, X_cloth, X_mask, Y_output):
    num_samples = X_person.shape[0]
    steps_per_epoch = num_samples // batch_size

    for epoch in range(epochs):
        for batch_i in range(steps_per_epoch):
            # Prepare a batch of data
            start_idx = batch_i * batch_size
            end_idx = start_idx + batch_size
            
            # Slice the arrays to get the batch data
            batch_person = X_person[start_idx:end_idx]
            batch_cloth = X_cloth[start_idx:end_idx]
            batch_mask = X_mask[start_idx:end_idx]
            real_images = Y_output[start_idx:end_idx]

            # Combine inputs for generator
            input_data = [batch_person, batch_cloth, batch_mask]

            # Generate fake images using the generator
            fake_images = generator.predict(input_data)

            # Create labels: 1 for real images, 0 for fake images
            real_labels = np.ones((batch_size, 1))  # Label real images as 1
            fake_labels = np.zeros((batch_size, 1))  # Label generated images as 0

            # Train discriminator on real images
            d_loss_real = discriminator.train_on_batch(real_images, real_labels)

            # Train discriminator on generated (fake) images
            d_loss_fake = discriminator.train_on_batch(fake_images, fake_labels)

            # Calculate the average loss for the discriminator
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Verification Block: Check discriminator accuracy after each batch
            print(f"Discriminator Accuracy: {d_loss[1] * 100:.2f}% on batch {batch_i+1}")

            # Prepare labels for generator training: all labels should be real (1)
            misleading_labels = np.ones((batch_size, 1))

            # Train the generator via the combined model
            g_loss = combined.train_on_batch(input_data, misleading_labels)

            # Extract the primary loss value if g_loss is a list or array
            if isinstance(g_loss, (list, np.ndarray)):
                primary_g_loss = g_loss[0]  # Access the primary loss value
            else:
                primary_g_loss = g_loss

            # Verification Block: Log generator loss after each batch
            print(f"Generator Loss: {primary_g_loss:.4f} on batch {batch_i+1}")

        # Epoch summary
        print(f"Epoch {epoch+1}/{epochs} | D Loss: {d_loss[0]:.4f} | D Acc: {100 * d_loss[1]:.2f}% | G Loss: {primary_g_loss:.4f}")

        # Visualize results after each epoch
        visualize_results(generator, input_data, real_images, num_samples=5, epoch=epoch + 1)

    # Save final generated images for evaluation
    print("Training complete. Generating final images for evaluation...")
    visualize_results(generator, [X_person, X_cloth, X_mask], Y_output, num_samples=10, epoch="final")


# Define the number of epochs and batch size
epochs = 10
batch_size = 32

# Call the training function
train_gan(generator, discriminator, combined, epochs, batch_size, X_person, X_cloth, X_segmentation, Y_output)


In [None]:
# Define the number of epochs and batch size
epochs = 10
batch_size = 32

# Example data initialization (replace with actual data)
# X_person = np.random.rand(500, 128, 128, 3)
# X_cloth = np.random.rand(500, 128, 128, 3)
# X_mask = np.random.rand(500, 128, 128, 1)
# Y_output = np.random.rand(500, 128, 128, 3)

# Call the training function
train_gan(generator, discriminator, combined, epochs, batch_size, X_person, X_cloth, X_segmentation, Y_output)
