<a href="https://colab.research.google.com/github/HaikChaang/AudioGAN/blob/main/AudioGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# # 1. Import Dependencies and Data
# Bringing in tensorflow
import tensorflow as tf
# Bringing in matplotlib for viz stuff
from matplotlib import pyplot as plt
# Do some data transformation
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
data_dir = '/content/drive/MyDrive/AudioGAN/Spectrogram'

In [None]:
# # 2. Viz Data and Build Dataset
# Use the tensorflow datasets api to bring in the data source
#ds = tf.keras.utils.image_dataset_from_directory(data_dir,image_size=(1868, 1025),label_mode = None, color_mode = 'grayscale', batch_size=4)
#plt.imshow(ds.as_numpy_iterator().next()[0])
#plt.show()

In [None]:
# Setup connection aka iterator
#dataiterator = ds.as_numpy_iterator()

# Getting data out of the pipeline
#dataiterator.next()

# Setup the subplot formatting
#fig, ax = plt.subplots(ncols=4, figsize=(20,20))
# Loop four times and get images
#for idx in range(4):
    # Grab an image and label
    #sample = dataiterator.next()
    # Plot the image using a specific subplot
    #ax[idx].imshow(sample[idx])
#plt.show()


In [None]:
# Scale and return images only
def scale_images(data):
    image = data
    return image / 255


In [None]:
# Reload the dataset
ds = tf.keras.utils.image_dataset_from_directory(data_dir,image_size=(600, 513),label_mode = None, color_mode = 'grayscale', batch_size=1)
# Running the dataset through the scale_images preprocessing step
ds = ds.map(scale_images)
#ds.as_numpy_iterator().next().shape

Found 733 files belonging to 1 classes.


In [None]:
# # 3. Build Neural Network

# ### 3.1 Import Modelling Components

# Bring in the sequential api for the generator and discriminator
from tensorflow.keras.models import Sequential
# Bring in the layers for the neural network
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Reshape, LeakyReLU, Dropout, UpSampling2D

In [None]:
# ### 3.2 Build Generator

def build_generator():
    model = Sequential(name = 'Generator')

    # Takes in random values and reshapes it to 4*5*64
    # Beginnings of a generated image
    model.add(Dense(50*57*64,input_dim=64))
    model.add(LeakyReLU(0.2))
    model.add(Reshape((50,57,64)))

    # Upsampling block 1
    model.add(UpSampling2D((6,3)))
    model.add(Conv2D(32, 5, padding = 'same'))
    model.add(LeakyReLU(0.2))

    # Upsampling block 2
    model.add(UpSampling2D((2,3)))
    model.add(Conv2D(64, 5, padding = 'same'))
    model.add(LeakyReLU(0.2))

    # Convolutional block 1
    model.add(Conv2D(128, 5, padding = 'same'))
    model.add(LeakyReLU(0.2))

    # Convolutional block 2
    model.add(Conv2D(128, 5, padding = 'same'))
    model.add(LeakyReLU(0.2))

    # Conv layer to get to one channel
    model.add(Conv2D(1, 5 ,activation='sigmoid', padding = 'same'))

    return model

generator = build_generator()
generator.summary()

#img = generator.predict(np.random.randn(4,16,1))
# Generate new fashion
#images = generator.predict(np.random.randn(4,64,1))
# Setup the subplot formatting
#fig, ax = plt.subplots(ncols=4, figsize=(20,20))
# Loop four times and get images
#for i,img in enumerate(images):
    #Plot the image using a specific subplot
    #ax[i].imshow(img)
#plt.show()

Model: "Generator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 182400)            11856000  
                                                                 
 leaky_re_lu_9 (LeakyReLU)   (None, 182400)            0         
                                                                 
 reshape_1 (Reshape)         (None, 50, 57, 64)        0         
                                                                 
 up_sampling2d_2 (UpSampling  (None, 300, 171, 64)     0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 300, 171, 32)      51232     
                                                                 
 leaky_re_lu_10 (LeakyReLU)  (None, 300, 171, 32)      0         
                                                         

In [None]:
# ### 3.3 Build Discriminator
def build_discriminator():
    model = Sequential(name = 'Discriminator')

    # First Conv Block
    model.add(Conv2D(8, 5, input_shape = (600,513,1)))
    model.add(LeakyReLU(0.2))
    model.add(Dropout(0.4))

    # Second Conv Block
    model.add(Conv2D(16, 5))
    model.add(LeakyReLU(0.2))
    model.add(Dropout(0.4))

    # Third Conv Block
    model.add(Conv2D(32, 5))
    model.add(LeakyReLU(0.2))
    model.add(Dropout(0.4))

    # Fourth Conv Block
    model.add(Conv2D(32, 5))
    model.add(LeakyReLU(0.2))
    model.add(Dropout(0.4))

    # Flatten then pass to dense layer
    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))

    return model

In [None]:
discriminator = build_discriminator()
discriminator.summary()
#img = images
#print(img.shape)
#print(discriminator.predict(img))

Model: "Discriminator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_14 (Conv2D)          (None, 596, 509, 8)       208       
                                                                 
 leaky_re_lu_14 (LeakyReLU)  (None, 596, 509, 8)       0         
                                                                 
 dropout_5 (Dropout)         (None, 596, 509, 8)       0         
                                                                 
 conv2d_15 (Conv2D)          (None, 592, 505, 16)      3216      
                                                                 
 leaky_re_lu_15 (LeakyReLU)  (None, 592, 505, 16)      0         
                                                                 
 dropout_6 (Dropout)         (None, 592, 505, 16)      0         
                                                                 
 conv2d_16 (Conv2D)          (None, 588, 501, 32)    

In [None]:
generator = tf.keras.saving.load_model('/content/drive/MyDrive/AudioGAN/Generator')
discriminator = tf.keras.saving.load_model('/content/drive/MyDrive/AudioGAN/Discriminator')



In [None]:
# # 4. Construct Training Loop

# ### 4.1 Setup Losses and Optimizers
# Adam is going to be the optimizer for both
from tensorflow.keras.optimizers import Adam
# Binary cross entropy is going to be the loss for both
from tensorflow.keras.losses import BinaryCrossentropy

g_opt = Adam(learning_rate=0.001)
d_opt = Adam(learning_rate=0.0001)
g_loss = BinaryCrossentropy()
d_loss = BinaryCrossentropy()

In [None]:
# ### 4.2 Build Subclassed Model

# Importing the base model class to subclass our training step
from tensorflow.keras.models import Model

class AudioGAN(Model):
    def __init__(self, generator, discriminator, *args, **kwargs):
        # Pass through args and kwargs to base class
        super().__init__(*args, **kwargs)

        # Create attributes for gen and disc
        self.generator = generator
        self.discriminator = discriminator

    def compile(self, g_opt, d_opt, g_loss, d_loss, *args, **kwargs):
        # Compile with base class
        super().compile(*args, **kwargs)

        # Create attributes for losses and optimizers
        self.g_opt = g_opt
        self.d_opt = d_opt
        self.g_loss = g_loss
        self.d_loss = d_loss

    def train_step(self, batch):
        # Get the data
        real_images = batch
        fake_images = self.generator(tf.random.normal((1, 64, 1)), training=False)
        # Train the discriminator
        with tf.GradientTape() as d_tape:
            # Pass the real and fake images to the discriminator model
            yhat_real = self.discriminator(real_images, training=True)
            yhat_fake = self.discriminator(fake_images, training=True)
            yhat_realfake = tf.concat([yhat_real, yhat_fake], axis=0)
            # Create labels for real and fakes images
            y_realfake = tf.concat([tf.zeros_like(yhat_real), tf.ones_like(yhat_fake)], axis=0)

            # Add some noise to the TRUE outputs
            noise_real = 0.15*tf.random.uniform(tf.shape(yhat_real))
            noise_fake = -0.15*tf.random.uniform(tf.shape(yhat_fake))
            y_realfake += tf.concat([noise_real, noise_fake], axis=0)

            # Calculate loss - BINARYCROSS
            total_d_loss = self.d_loss(y_realfake, yhat_realfake)

        # Apply backpropagation - nn learn
        dgrad = d_tape.gradient(total_d_loss, self.discriminator.trainable_variables)
        self.d_opt.apply_gradients(zip(dgrad, self.discriminator.trainable_variables))

        # Train the generator
        with tf.GradientTape() as g_tape:
            # Generate some new images
            gen_images = self.generator(tf.random.normal((1,64,1)), training=True)

            # Create the predicted labels
            predicted_labels = self.discriminator(gen_images, training=False)

            # Calculate loss - trick to training to fake out the discriminator
            total_g_loss = self.g_loss(tf.zeros_like(predicted_labels), predicted_labels)

        # Apply backprop
        ggrad = g_tape.gradient(total_g_loss, self.generator.trainable_variables)
        self.g_opt.apply_gradients(zip(ggrad, self.generator.trainable_variables))

        return {"d_loss":total_d_loss, "g_loss":total_g_loss}

In [None]:
# Create instance of subclassed model
gan = AudioGAN(generator, discriminator)

# Compile the model
gan.compile(g_opt, d_opt, g_loss, d_loss)

In [None]:
# ### 4.3 Build Callback

import os
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.callbacks import Callback


# In[185]:


class ModelMonitor(Callback):
    def __init__(self, num_img=3, latent_dim=64):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.uniform((self.num_img, self.latent_dim,1))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = array_to_img(generated_images[i])
            #img.save(os.path.join('/content/drive/MyDrive/AudioGAN/images', f'generated_img_{epoch}_{i}.png'))


In [None]:
# ### 4.3 Train
while True:
  # Recommend 2000 epochs
  hist = gan.fit(ds, epochs=1, callbacks=[ModelMonitor()], verbose = 1)

# ### 4.4 Review Performance
#plt.suptitle('Loss')
#plt.plot(hist.history['d_loss'], label='d_loss')
#plt.plot(hist.history['g_loss'], label='g_loss')
#plt.legend()
#plt.show()
  generator.save('/content/drive/MyDrive/Generator')
  discriminator.save('/content/drive/MyDrive/Discriminator')

  6/733 [..............................] - ETA: 7:11 - d_loss: 0.2667 - g_loss: 2.5175







  6/733 [..............................] - ETA: 7:26 - d_loss: 0.2526 - g_loss: 2.5658







  6/733 [..............................] - ETA: 7:24 - d_loss: 0.2687 - g_loss: 1.7556







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.2557 - g_loss: 2.3006







  6/733 [..............................] - ETA: 7:17 - d_loss: 0.2539 - g_loss: 2.3889







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.3083 - g_loss: 2.2497







  6/733 [..............................] - ETA: 7:17 - d_loss: 0.2985 - g_loss: 2.4305







  6/733 [..............................] - ETA: 7:24 - d_loss: 0.2587 - g_loss: 2.2940







  6/733 [..............................] - ETA: 7:24 - d_loss: 0.3119 - g_loss: 2.5188







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.2433 - g_loss: 2.2425







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.3041 - g_loss: 2.5691







  6/733 [..............................] - ETA: 7:26 - d_loss: 0.2769 - g_loss: 2.6845







  6/733 [..............................] - ETA: 7:18 - d_loss: 0.2553 - g_loss: 2.2194







  6/733 [..............................] - ETA: 7:18 - d_loss: 0.2490 - g_loss: 2.6363







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.3136 - g_loss: 2.5745







  6/733 [..............................] - ETA: 7:27 - d_loss: 0.2603 - g_loss: 2.4644







  6/733 [..............................] - ETA: 7:20 - d_loss: 0.2563 - g_loss: 2.5555







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.2700 - g_loss: 2.5683







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.3089 - g_loss: 2.3984







  6/733 [..............................] - ETA: 7:24 - d_loss: 0.3112 - g_loss: 2.5111







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.2463 - g_loss: 2.5474







  6/733 [..............................] - ETA: 7:31 - d_loss: 0.2669 - g_loss: 2.5532







  6/733 [..............................] - ETA: 7:32 - d_loss: 0.2364 - g_loss: 2.6905







  6/733 [..............................] - ETA: 7:27 - d_loss: 0.2712 - g_loss: 2.6461







  6/733 [..............................] - ETA: 7:26 - d_loss: 0.3045 - g_loss: 2.3737







  6/733 [..............................] - ETA: 7:25 - d_loss: 0.2570 - g_loss: 2.6569





In [None]:
"""
# # 5. Test Out the Generator

# ### 5.1 Generate Images

# In[211]:


generator.load_weights(os.path.join('archive', 'generatormodel.h5'))


# In[228]:


imgs = generator.predict(tf.random.normal((16, 128, 1)))


# In[229]:


fig, ax = plt.subplots(ncols=4, nrows=4, figsize=(10,10))
for r in range(4):
    for c in range(4):
        ax[r][c].imshow(imgs[(r+1)*(c+1)-1])


# ### 5.2 Save the Model

# In[ ]:


generator.save('generator.h5')
discriminator.save('discriminator.h5')

"""



