In [None]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Dense, Reshape, BatchNormalization, Input, Embedding, Concatenate, Flatten, Conv2DTranspose, Activation
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [None]:
text_data = ["The cat is on the mat.", "The dog is chasing the cat."]

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
vocab_size = len(tokenizer.word_index) + 1

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(text_data)

# Padding
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# One-hot encoding
one_hot_sequences = np.eye(vocab_size)[padded_sequences]

In [None]:
# image_directory = "/path/to/image/directory"
target_image_size = (64, 64)

# ImageDataGenerator for loading and preprocessing images
image_datagen = ImageDataGenerator(rescale=1.0/255.0)

# Create a flow_from_directory generator
image_generator = image_datagen.flow_from_directory(
    image_directory,
    target_size=target_image_size,
    batch_size=batch_size,
    class_mode=None  # We don't need class labels for text-to-image GAN
)

In [None]:
def get_data_batch(text_sequences, image_generator, batch_size):
    while True:
        # Generate a batch of text samples
        text_indices = np.random.randint(0, len(text_sequences), size=batch_size)
        batch_text = text_sequences[text_indices]

        # Generate a batch of image samples
        batch_images = next(image_generator)

        yield batch_text, batch_images

In [None]:
embedding_dim = 100

# Define the image size
image_size = 64

# Define the generator model
def build_generator():
    model = Sequential()
    
    model.add(Dense(256 * 8 * 8, input_dim=embedding_dim))
    model.add(Reshape((8, 8, 256)))
    
    model.add(Conv2DTranspose(128, kernel_size=4, strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2DTranspose(64, kernel_size=4, strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2DTranspose(3, kernel_size=4, strides=2, padding='same'))
    model.add(Activation('tanh'))
    
    return model

# Define the discriminator model
def build_discriminator():
    image_input = Input(shape=(image_size, image_size, 3))
    x = Conv2D(64, kernel_size=4, strides=2, padding='same')(image_input)
    x = LeakyReLU()(x)
    
    x = Conv2D(128, kernel_size=4, strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)
    
    x = Conv2D(256, kernel_size=4, strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)
    
    x = Flatten()(x)
    validity = Dense(1, activation='sigmoid')(x)
    
    discriminator = Model(image_input, validity)
    return discriminator

# Build the text-to-image GAN
def build_text2image_gan(generator, discriminator):
    discriminator.trainable = False
    
    text_input = Input(shape=(embedding_dim,))
    generated_image = generator(text_input)
    
    validity = discriminator(generated_image)
    
    gan = Model(text_input, validity)
    return gan

# Instantiate the models
generator = build_generator()
discriminator = build_discriminator()
text2image_gan = build_text2image_gan(generator, discriminator)

# Compile the models
optimizer = Adam(0.0002, 0.5)
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
text2image_gan.compile(loss='binary_crossentropy', optimizer=optimizer)


In [None]:
# Set the number of epochs and batch size
epochs = 100
batch_size = 32

# Set the number of steps per epoch
steps_per_epoch = len(text_sequences) // batch_size

# Set the path to save generated images
output_path = "/path/to/save/generated/images/"

# Training loop
for epoch in range(epochs):
    print("Epoch {}/{}".format(epoch + 1, epochs))

    # Initialize the loss and accuracy for each epoch
    disc_loss_epoch = 0
    disc_acc_epoch = 0
    gan_loss_epoch = 0

    # Iterate over the batches of data
    for step in range(steps_per_epoch):
        # Get a batch of text and image samples
        batch_text, batch_images = next(get_data_batch(text_sequences, image_generator, batch_size))

        # ---------------------
        #  Train Discriminator
        # ---------------------
        
        # Generate fake images from text
        fake_images = generator.predict(batch_text)

        # Train the discriminator
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))
        
        d_loss_real = discriminator.train_on_batch(batch_images, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_images, fake_labels)
        disc_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        disc_loss_epoch += disc_loss[0]
        disc_acc_epoch += disc_loss[1]

        # -----------------
        #  Train Generator
        # -----------------

        # Generate random noise vectors for the generator
        noise = np.random.normal(0, 1, (batch_size, embedding_dim))

        # Train the generator (text-to-image GAN)
        gan_loss = text2image_gan.train_on_batch(batch_text, real_labels)
        gan_loss_epoch += gan_loss

    # Calculate and display the average losses and accuracies for the epoch
    avg_disc_loss = disc_loss_epoch / steps_per_epoch
    avg_disc_acc = disc_acc_epoch / steps_per_epoch
    avg_gan_loss = gan_loss_epoch / steps_per_epoch

    print("Discriminator Loss: {:.4f}, Accuracy: {:.2f}%".format(avg_disc_loss, avg_disc_acc * 100))
    print("Generator Loss: {:.4f}".format(avg_gan_loss))

    # Save generated images for visualization
    if (epoch + 1) % 10 == 0:
        random_text = np.random.normal(0, 1, (batch_size, embedding_dim))
        generated_images = generator.predict(random_text)
        save_generated_images(generated_images, output_path, epoch + 1)


In [None]:
# Save the generator model
generator.save('path/to/generator_model.h5')

# Save the tokenizer
np.save('path/to/word_index.npy', tokenizer.word_index)

# using the model with fastapi

In [None]:
# pip install fastapi uvicorn


from fastapi import FastAPI
from PIL import Image
from io import BytesIO
import numpy as np
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Create the FastAPI instance
app = FastAPI()

# Load the text-to-image GAN model
generator = tf.keras.models.load_model('path/to/generator_model.h5')
tokenizer = Tokenizer()
tokenizer.word_index = np.load('path/to/word_index.npy', allow_pickle=True).item()
max_sequence_length = 10  # Replace with the maximum sequence length used during training

# Define the API endpoint for generating images
@app.post("/generate_image")
async def generate_image(text: str):
    # Preprocess the text
    sequence = tokenizer.texts_to_sequences([text])
    sequence = pad_sequences(sequence, maxlen=max_sequence_length)

    # Generate image
    noise = np.random.normal(0, 1, (1, embedding_dim))
    generated_image = generator.predict([noise, sequence])

    # Convert the generated image to PIL Image format
    generated_image = (generated_image[0] * 127.5 + 127.5).astype(np.uint8)
    pil_image = Image.fromarray(generated_image)

    # Convert the PIL Image to bytes for API response
    image_bytes = BytesIO()
    pil_image.save(image_bytes, format='JPEG')
    image_bytes.seek(0)

    return {"image": image_bytes}


# Run the FastAPI application using Uvicorn server
if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=8000)

# uvicorn app:app --reload