In [None]:
%pip install tensorflow
%pip install transformers
%pip install datasets
%pip install pillow
%pip install numpy
%pip install matplotlib
%pip install datasets

In [None]:
import sys

IS_COLAB = "google.colab" in sys.modules
if IS_COLAB:
    import os
    os.environ["TF_USE_LEGACY_KERAS"] = "1"
    import tf_keras

import tensorflow as tf

tf.random.set_seed(42)

In [None]:
from datasets import load_dataset

DATASET_SIZE = 23298
dataset = load_dataset("microsoft/cats_vs_dogs")

def to_rgb(example):
  if example["image"].mode != "RGB":
    example["image"] = example["image"].convert("RGB")
  return example

dataset = load_dataset("microsoft/cats_vs_dogs")
dataset = dataset.filter(lambda example: example["labels"] == 0).map(to_rgb)

tf_dataset = dataset["train"].to_tf_dataset(
  shuffle=True,
  batch_size=None
)

def dict_to_tuple(data):
    image = tf.cast(data['image'], tf.float32)
    image = (image / 127.5) - 1.0  # map [0,255] → [-1,1]
    return image, tf.cast(data['labels'], tf.float32)

def resize(image, label):
    image = tf.image.resize(image, [48, 64])
    return image, label

tf_dataset = tf_dataset.map(dict_to_tuple).map(resize)

In [None]:
import matplotlib.pyplot as plt

def show_image(numpy_image):
  plt.imshow(numpy_image)
  plt.axis("off")
  plt.show()

for i, l in tf_dataset.skip(20).take(5):
  show_image(((i.numpy()+1)/2))

In [None]:
from tensorflow.keras import layers
import math

batch_size = 64

final_dataset = tf_dataset.shuffle(batch_size).take(batch_size*math.ceil(10000/batch_size)).batch(batch_size).prefetch(1)

codings_size = 256

generator = tf.keras.Sequential([
    layers.Dense(6 * 8 * 512),
    layers.Reshape([6, 8, 512]),
    layers.BatchNormalization(),
    layers.Conv2DTranspose(128, kernel_size=7, strides=2,
                                    padding="same", activation="relu"),
    layers.BatchNormalization(),
    layers.Conv2DTranspose(64, kernel_size=5, strides=2,
                                    padding="same", activation="relu"),
    layers.BatchNormalization(),
    layers.Conv2DTranspose(3, kernel_size=5, strides=2,
                                    padding="same", activation="tanh"),
])

discriminator = tf.keras.Sequential([
    layers.Input(shape=(48, 64, 3)),
    layers.Conv2D(64, kernel_size=5, strides=2, padding="same",
                  activation=layers.LeakyReLU(0.2)),
    layers.Dropout(0.3),
    layers.Conv2D(128, kernel_size=5, strides=2, padding="same",
                  activation=layers.LeakyReLU(0.2)),
    layers.Dropout(0.3),
    layers.Flatten(),
    layers.Dense(256, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

gan = tf.keras.Sequential([generator, discriminator])

# DEV-NOTE
# rmsprop seems to give better results to around 50 epochs
# i think it is worth a while to test out adam optimizer at 0.002 learning rate
# unfortunetly i don't have access to enough resources to validate this theory

discriminator.compile(loss="binary_crossentropy", optimizer="rmsprop")
discriminator.trainable = False
gan.compile(loss="binary_crossentropy", optimizer="rmsprop")

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import time

print(time.time())

def benchmark_discriminator(take_number=1):
  for X_batch, _ in final_dataset.take(take_number):
      noise = tf.random.normal(shape=[batch_size, codings_size])
      generated_images = generator(noise)

      X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)

      predictions = discriminator.predict(X_fake_and_real)

      print("Discriminator predictions on fake images (first 5):", predictions[:5].flatten())
      print("Discriminator predictions on real images (first 5):", predictions[batch_size:batch_size+5].flatten())

      n_images_to_show = 5
      plt.figure(figsize=(15, 3))
      for i in range(n_images_to_show):
          plt.subplot(1, n_images_to_show, i+1)
          plt.imshow(((generated_images[i].numpy()+1)/2))
          plt.axis("off")
      plt.show()


benchmark_discriminator()

In [None]:
import time

print(time.time())

epochs = 20

# DEV-NOTE
# this should be later updated into better learning loop
# based on gradient tape

# this training loop is heavly inspired by the example in:
# https://github.com/ageron/handson-ml3
def train_gan(gan, dataset, batch_size, codings_size, n_epochs):
    generator, discriminator = gan.layers
    print("training started.")
    for epoch in range(n_epochs):
        start = time.time()
        for X_batch, _ in dataset:
            # phase 1 - training the discriminator
            noise = tf.random.normal(shape=[batch_size, codings_size])
            generated_images = generator(noise)
            X_fake_and_real = tf.concat([generated_images, X_batch], axis=0)
            y1 = tf.constant([[0.1]] * batch_size + [[0.9]] * batch_size)

            discriminator.train_on_batch(X_fake_and_real, y1)

            # phase 2 - training the generator
            noise = tf.random.normal(shape=[batch_size, codings_size])
            y2 = tf.constant([[1.]] * batch_size)
            gan.train_on_batch(noise, y2)

        print(f"Epoch {epoch + 1}/{n_epochs} - time: {time.time()-start}")  # extra code
        # extra code — plot images during training
        benchmark_discriminator()

train_gan(gan, final_dataset, batch_size, codings_size, epochs)

In [None]:
generator.save("generator.gencat.base-mk1.keras")
discriminator.save("discriminator.gencat.base-mk1.keras")

In [None]:
# loading from keras files

generator = tf.keras.models.load_model('generator.catgen.dev-mk1.keras')
discriminator = tf.keras.models.load_model('discriminator.catgen.dev-mk1.keras')
discriminator.trainable = False

gan = tf.keras.Sequential([generator, discriminator])
gan.compile(loss="binary_crossentropy", optimizer="rmsprop")

In [None]:
import pygame

pygame.mixer.init()  # Initialize mixer
pygame.mixer.music.load("finished.mp3")  # Can be mp3 or wav
pygame.mixer.music.play()

# Keep script alive until sound finishes
while pygame.mixer.music.get_busy():
    pygame.time.Clock().tick(10)