<a href="https://colab.research.google.com/github/Thesis-g7/autoencoder_speech_augmentation/blob/main/ae.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, Flatten, Dense, Reshape, Conv2DTranspose, Activation
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.datasets import mnist
import numpy as np
import os
import pickle

In [None]:
class AutoEncoder:
  """
  Represents a deep convolutional autoencoder architecture with
  mirrored encoder and decoder componenets
  """
  def __init__(self,
               input_shape,
               conv_filters,
               conv_kernals,
               conv_strides,
               latent_space_dis):
    self.input_shape = input_shape
    self.conv_filters = conv_filters
    self.conv_kernals = conv_kernals
    self.conv_strides = conv_strides
    self.latent_space_dis = latent_space_dis

    self.ecoder = None
    self.decoder = None
    self.model = None
    self._model_input = None

    self._shape_before_bottleneck = None

    self._num_conv_layers = len(conv_filters)
    self._build()

  def summary(self):
     self.encoder.summary()
     self.decoder.summary()
     self.model.summary()

  def compile(self, learning_rate = 0.0001):
    # get optimizer
    optimizer = Adam(learning_rate=learning_rate)
    mse_loss = MeanSquaredError()
    self.model.compile(optimizer = optimizer, loss = mse_loss)

  def train(self, x_train, batch_size, num_epochs):
    self.model.fit(x_train,
                   x_train,
                   batch_size = batch_size,
                   epochs = num_epochs,
                   shuffle = True)

  def save(self, save_folder="."):
    self._create_folder_if_it_doesnt_exist(save_folder)
    self._save_parameters(save_folder)
    self._save_weights(save_folder)

  def load_weights(self, weights_path):
    self.model.load_weights(weights_path)

  @classmethod
  def load(cls, save_folder="."):
    parameters_path = os.path.join(save_folder, "parameters.pkl")
    with open(parameters_path, "rb") as f:
      parameters = pickle.load(f)

    weights_path = os.path.join(save_folder, "weights.h5")
    autoencoder = AutoEncoder(*parameters)
    autoencoder.load_weights(weights_path)
    return autoencoder

  def _create_folder_if_it_doesnt_exist(self, folder):

    if not os.path.exists(folder):
      os.makedirs(folder)

  def _save_parameters(self, save_folder):

    params = [
        self.input_shape,
        self.conv_filters,
        self.conv_kernals,
        self.conv_strides,
        self.latent_space_dis
    ]

    save_path = os.path.join(save_folder, "parameters.pkl")
    with open(save_path, "wb") as f:
      pickle.dump(params, f)

  def _save_weights(self, save_folder):
    save_path = os.path.join(save_folder, "weights.h5")
    self.model.save_weights(save_path)


  def _build(self):
    self._build_encoder()
    self._build_decoder()
    self._build_autoencoder()

  def _build_autoencoder(self):
    model_input = self._model_input
    model_output = self.decoder(self.encoder(model_input))
    self.model = Model(model_input, model_output, name="autoencoder")


  def _build_encoder(self):
    encoder_input = self._add_encoder_input()
    self._model_input = encoder_input
    conv_layers = self._add_conv_layers(encoder_input)
    bottle_neck = self._add_bottleneck(conv_layers)

    self.encoder = Model(encoder_input, bottle_neck, name = "encoder")

  def _add_encoder_input(self):
    return Input(shape = self.input_shape, name = "encoder_input")

  def _add_conv_layers(self, encoder_input):
    """Creates all conv blocks in encoder """

    x = encoder_input
    for layer_index in range(self._num_conv_layers):
      x = self._add_conv_layer(layer_index, x)
    return x

  def _add_conv_layer(self, layer_index, x):
    """ Adds a convolutional block to a graph of layers
    consisting of conv 2d + relU + batch normalization
    """

    conv_layer = Conv2D(
        filters = self.conv_filters[layer_index],
        kernel_size = self.conv_kernals[layer_index],
        strides = self.conv_strides[layer_index],
        padding = "same",
        name = f"encoder_conv_layer_{layer_index+1}"
    )
    x = conv_layer(x)
    x = ReLU(name = f"encoder_relu_{layer_index+1}")(x)
    x = BatchNormalization(name = f"encoder_BN_{layer_index+1}")(x)
    return x

  def _add_bottleneck(self, x):
    """Flatten Data and add bottle neck (Dense Layer)"""
    #store shape of x to for decoder
    self._shape_before_bottleneck = K.int_shape(x)[1:] # [7,7,32]
    x = Flatten()(x)
    x = Dense(self.latent_space_dis, name=f"encoder_output")(x)
    return x

  def _build_decoder(self):
    decoder_input = self._add_decoder_input()
    dense_layer = self._add_dense_layer(decoder_input)
    reshape_layer = self._add_reshape_layer(dense_layer)
    conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
    decoder_output = self._add_decoder_output(conv_transpose_layers)

    self.decoder = Model(decoder_input, decoder_output, name="decoder")

  def _add_decoder_input(self):
    return Input(shape=self.latent_space_dis, name="decoder_input")

  def _add_dense_layer(self, decoder_input):
    num_neurons = np.prod(self._shape_before_bottleneck) #[4,3,2] -> 24
    dense_layer = Dense(num_neurons, name = "decoder_dense")(decoder_input)
    return dense_layer

  def _add_reshape_layer(self, dense_layer):
    return Reshape(self._shape_before_bottleneck)(dense_layer)

  def _add_conv_transpose_layers(self, x):
    """Add Conv transpose blocks"""

    #loop through all the conv layers in reverse order
    #and stop at the first layer

    for layer_index in reversed(range(1, self._num_conv_layers)):
      x = self._add_conv_transpose_layer(layer_index, x)

    return x

  def _add_conv_transpose_layer(self, layer_index, x):
    layer_num = self._num_conv_layers - layer_index
    conv_transpose_layer = Conv2DTranspose(
        filters = self.conv_filters[layer_index],
        kernel_size = self.conv_kernals[layer_index],
        strides = self.conv_strides[layer_index],
        padding = "same",
        name = f"decoder_conv_trans_layer_{layer_num}"
    )

    x = conv_transpose_layer(x)
    x = ReLU(name=f"decoder_relu_{layer_num}")(x)
    x = BatchNormalization(name =f"decoder_BN_{layer_num}")(x)
    return x

  def _add_decoder_output(self, x):
    conv_transpose_layer = Conv2DTranspose(
        filters = 1, #bw ouput
        kernel_size = self.conv_kernals[0],
        strides = self.conv_strides[0],
        padding = "same",
        name = f"decoder_conv_trans_layer_{self._num_conv_layers}"
    )
    x = conv_transpose_layer(x)

    output_layer = Activation("sigmoid", name = "output_sigmoid_layer")(x)
    return output_layer

In [None]:
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 20

In [None]:
def load_mnist():
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  # normalize from 0 -> 1
  x_train = x_train.astype("float32")/255
  #reshape to add extra dimension for the channel
  x_train = x_train.reshape(x_train.shape + (1,))

  #do the same for x_test
  # normalize from 0 -> 1
  x_test = x_test.astype("float32")/255
  #reshape to add extra dimension for the channel
  x_test = x_test.reshape(x_test.shape + (1,))

  return x_train, y_train, x_test, y_test

In [None]:
def train(x_train, learning_rate, batch_size, epochs):
  autoencoder = AutoEncoder(
    input_shape=(28,28,1),
    conv_filters=(32,64,64,64),
    conv_kernals=(3,3,3,3),
    conv_strides=(1,2,2,1),
    latent_space_dis=2
  )
  autoencoder.summary()
  autoencoder.compile(learning_rate)
  autoencoder.train(x_train, batch_size, epochs)
  return autoencoder

In [None]:
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:500], LEARNING_RATE, BATCH_SIZE, EPOCHS)

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 28, 28, 1)]       0         
                                                                 
 encoder_conv_layer_1 (Conv  (None, 28, 28, 32)        320       
 2D)                                                             
                                                                 
 encoder_relu_1 (ReLU)       (None, 28, 28, 32)        0         
                                                                 
 encoder_BN_1 (BatchNormali  (None, 28, 28, 32)        128       
 zation)                                                         
                                                                 
 encoder_conv_layer_2 (Conv  (None, 14, 14, 64)        18496     
 2D)                                                             
                                                           

In [None]:
autoencoder.save("model")
autoencoder2 = AutoEncoder.load("model")
autoencoder2.summary()

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 28, 28, 1)]       0         
                                                                 
 encoder_conv_layer_1 (Conv  (None, 28, 28, 32)        320       
 2D)                                                             
                                                                 
 encoder_relu_1 (ReLU)       (None, 28, 28, 32)        0         
                                                                 
 encoder_BN_1 (BatchNormali  (None, 28, 28, 32)        128       
 zation)                                                         
                                                                 
 encoder_conv_layer_2 (Conv  (None, 14, 14, 64)        18496     
 2D)                                                             
                                                           