# Conditional GAN
<!-- source of following section: https://machinelearningmastery.com/how-to-develop-a-conditional-generative-adversarial-network-from-scratch/ -->
based on: https://keras.io/examples/generative/conditional_gan/ (original version: 2d data, converted to 1d data)

Comment: the encoder and the decoder always have to be adjusted regarding the dimensionality due to the variable number of features (ingredients)

In [30]:
import os
import pandas as pd
import ast

import tensorflow as tf
from tensorflow import keras
from keras import layers

In [21]:
dir = os.getcwd()
df_features = pd.read_csv(dir + r'\data\df_features.csv')
df_labels = pd.read_csv(dir + r'\data\df_labels.csv')

## constants and hyperparameters

In [None]:
batch_size = 4
num_channels = 1
num_classes = 3
feature_size = len(df.columns) - 2
latent_dim = 300

## preprocessing

In [None]:
# preprare list of attributes for transforming values of fake recipes to real ingredient names
attributes = list(df.columns)
attributes.remove('label')
attributes.remove('')

In [None]:
# convert features dataframe to numpy array
# df_features = pd.DataFrame(df_features).to_numpy()

In [None]:
feature_tensor = (tf.convert_to_tensor(df_features))
label_tensor = (tf.convert_to_tensor(df_labels))

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((df_features, df_labels))
dataset = dataset.shuffle(buffer_size = 1024).batch(batch_size)

## calculating numbers

In [None]:
generator_in_channels = latent_dim + num_classes
discriminator_in_channels = num_channels + num_classes
print(generator_in_channels, discriminator_in_channels)

In [None]:
# Create the discriminator.
discriminator = keras.Sequential(
    [
        keras.layers.InputLayer((df_features.shape[1], discriminator_in_channels)),
        layers.Conv1D(df_features.shape[1]*2, 3, strides = 2, padding = 'same'),
        layers.LeakyReLU(alpha = 0.2),
        layers.Conv1D(df_features.shape[1]*4, 3, strides = 2, padding = 'same'),
        layers.LeakyReLU(alpha=0.2),
        layers.GlobalMaxPooling1D(),
        layers.Dense(1),
    ], 
    name = 'discriminator'
)

In [None]:
# Create the generator.
generator = keras.Sequential(
    [
        keras.layers.InputLayer((generator_in_channels,)),
        # We want to generate 128 + num_classes coefficients to reshape into a
        # 7x7x(128 + num_classes) map.
        layers.Dense(7 * generator_in_channels),
        layers.LeakyReLU(alpha=0.2),
        layers.Reshape((7, generator_in_channels)),
        layers.Conv1DTranspose(128, 10, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 12, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 10, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 10, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 12, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 12, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1DTranspose(128, 12, strides=2, padding="valid"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv1D(1, 11, padding="valid", activation="sigmoid"),
    ],
    name="generator",
)

## Conditional GAN model

In [None]:
class ConditionalGAN(keras.Model):
    def __init__(self, discriminator, generator, latent_dim):
        super().__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim
        self.gen_loss_tracker = keras.metrics.Mean(name="generator_loss")
        self.disc_loss_tracker = keras.metrics.Mean(name="discriminator_loss")

    @property
    def metrics(self):
        return [self.gen_loss_tracker, self.disc_loss_tracker]

    def compile(self, d_optimizer, g_optimizer, loss_fn):
        super().compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn

    def train_step(self, data):
        # Unpack the data.
        real_recipes, one_hot_labels = data
        real_recipes = tf.cast(real_recipes, tf.float32)
        real_recipes = tf.reshape(
            real_recipes, [-1, feature_size, 1]
        )
        
        # Add dummy dimensions to the labels so that they can be concatenated with
        # the recipes. This is for the discriminator.
        recipe_one_hot_labels = one_hot_labels[:, :, None]
        recipe_one_hot_labels = tf.repeat(
            recipe_one_hot_labels, repeats=[feature_size]
        )
        recipe_one_hot_labels = tf.reshape(
            recipe_one_hot_labels, (-1, feature_size, num_classes)
        )

        # Sample random points in the latent space and concatenate the labels.
        # This is for the generator.
        batch_size = tf.shape(real_recipes)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
        random_vector_labels = tf.concat(
            [random_latent_vectors, one_hot_labels], axis=1
        )
        
        # Decode the noise (guided by labels) to fake recipes.
        generated_recipes = self.generator(random_vector_labels)

        # Combine them with real recipes. Note that we are concatenating the labels
        # with these recipes here.
        fake_recipe_and_labels = tf.concat([generated_recipes, recipe_one_hot_labels], -1)
        # print(real_recipes)
        # print(recipe_one_hot_labels)
        real_recipe_and_labels = tf.concat([real_recipes, recipe_one_hot_labels], -1)
        combined_recipes = tf.concat(
            [fake_recipe_and_labels, real_recipe_and_labels], axis=0
        )
        

        # Assemble labels discriminating real from fake recipes.
        labels = tf.concat(
            [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
        )
        
        # Train the discriminator.
        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_recipes)
            d_loss = self.loss_fn(labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(
            zip(grads, self.discriminator.trainable_weights)
        )

        # Sample random points in the latent space.
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
        random_vector_labels = tf.concat(
            [random_latent_vectors, one_hot_labels], axis=1
        )

        # Assemble labels that say "all real recipes".
        misleading_labels = tf.zeros((batch_size, 1))

        # Train the generator (note that we should *not* update the weights
        # of the discriminator)!
        with tf.GradientTape() as tape:
            fake_recipes = self.generator(random_vector_labels)
            fake_recipe_and_labels = tf.concat([fake_recipes, recipe_one_hot_labels], -1)
            predictions = self.discriminator(fake_recipe_and_labels)
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))

        # Monitor loss.
        self.gen_loss_tracker.update_state(g_loss)
        self.disc_loss_tracker.update_state(d_loss)
        return {
            "g_loss": self.gen_loss_tracker.result(),
            "d_loss": self.disc_loss_tracker.result(),
        }

## Training Conditional GAN

In [None]:
cond_gan = ConditionalGAN(
    discriminator=discriminator, generator=generator, latent_dim=latent_dim
)
cond_gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
    loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),
)

cond_gan.fit(dataset, epochs=5)

## Interpolating between classes with the trained generator

In [None]:
# We first extract the trained generator from our Conditional GAN.
trained_gen = cond_gan.generator

# Choose the number of intermediate recipes that would be generated in
# between the interpolation + 2 (start and last recipes).
num_interpolation = 9  # @param {type:"integer"}

# Sample noise for the interpolation.
interpolation_noise = tf.random.normal(shape=(1, latent_dim))
interpolation_noise = tf.repeat(interpolation_noise, repeats=num_interpolation)
interpolation_noise = tf.reshape(interpolation_noise, (num_interpolation, latent_dim))


def interpolate_class(first_number, second_number):
    # Convert the start and end labels to one-hot encoded vectors.
    first_label = keras.utils.to_categorical([first_number], num_classes)
    second_label = keras.utils.to_categorical([second_number], num_classes)
    first_label = tf.cast(first_label, tf.float32)
    second_label = tf.cast(second_label, tf.float32)

    # Calculate the interpolation vector between the two labels.
    percent_second_label = tf.linspace(0, 1, num_interpolation)[:, None]
    percent_second_label = tf.cast(percent_second_label, tf.float32)
    interpolation_labels = (
        first_label * (1 - percent_second_label) + second_label * percent_second_label
    )

    # Combine the noise and the labels and run inference with the generator.
    noise_and_labels = tf.concat([interpolation_noise, interpolation_labels], 1)
    fake = trained_gen.predict(noise_and_labels)
    return fake


start_class = 0  # @param {type:"slider", min:0, max:2, step:1}
end_class = 2  # @param {type:"slider", min:0, max:2, step:1}

fake_recipies = interpolate_class(start_class, end_class)

In [None]:
len(attributes)

In [None]:
# put fake recipes into dataframe
for i, rec in enumerate(fake_recipies):
    # reshape recipe
    values = fake_recipies[i].reshape(1,1980)
    # for first recipe: create dataframe
    if i == 0:
        converted_fake_recipies = pd.DataFrame(values, columns = attributes)
    # for other recipes: add to dataframe 
    else:
        # convert values to list 
        values = values[0].tolist()
        converted_fake_recipies.loc[i] = values

In [None]:
# extract all ingredients that are not null for each recipe
recipes = []
for row, values in enumerate(converted_fake_recipies.values):
    # print(row, len(values), values)
    recipe = []
    for i, value in enumerate(converted_fake_recipies.values[row]):
        if value != 0:
            recipe.append(converted_fake_recipies.columns[i])
    recipes.append(recipe)
# recipe = []
# for i, value in enumerate(converted_fake_recipies.values[0]):
#     if value != 0:
#         recipe.append(converted_fake_recipies.columns[i])

In [None]:
for recipe in recipes:
    print(recipe)

# sources 

description of convoluation and transposed convolution layers: https://towardsdatascience.com/understand-transposed-convolutions-and-build-your-own-transposed-convolution-layer-from-scratch-4f5d97b2967

example code: https://keras.io/examples/generative/conditional_gan/