# The Encoder

Reads the file `test.flac` and compresses it.
The result will be two files: one containing the weights of the model: `weights.h5` and  the other containing the compressed representation `compressed.npy`

In [17]:
import tensorflow.keras as keras
import tensorflow.keras.losses as klosses
import tensorflow.keras.layers as layers
import soundfile as sf
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
import random
import itertools
import os

import tensorflow as tf
tf.get_logger().setLevel('ERROR')

numpy.random.seed(4)
random.seed(9)

In [11]:
def create_encoder_decoder(data_shape, compression_factor, activation_en='elu', activation_de='elu', optimizer='adam', kernel_width_en=256, kernel_width_de=64, channel_count_en=16, channel_count_de=16, regularizer=None):
  """creates an encoder-decoder pair to be used in an autoencoder"""
  acfun_in = activation_en
  acfun_out = activation_de
  layer_count = compression_exponent(compression_factor)

  encoder = keras.Sequential(name = "encoder")
  encoder.add(layers.Conv2D(channel_count_en, (kernel_width_en, data_shape[1]), activation='linear', padding='same', kernel_regularizer=regularizer, input_shape=data_shape))
  for _ in range(layer_count // 2):
    encoder.add(layers.Conv2D(channel_count_en, (kernel_width_en, data_shape[1]), strides = (4, 1), activation=acfun_in, padding='same', kernel_regularizer=regularizer))
  if layer_count % 2 == 1:
    encoder.add(layers.Conv2D(channel_count_en, (kernel_width_en, data_shape[1]), strides = (8, 1), activation=acfun_in, padding='same', kernel_regularizer=regularizer))
  else:
    encoder.add(layers.Conv2D(channel_count_en, (kernel_width_en, data_shape[1]), strides = (4, 1), activation=acfun_in, padding='same', kernel_regularizer=regularizer))

  encoder.add(layers.Conv2D(4, (kernel_width_en, data_shape[1]), activation = 'linear', padding='same'))
  
  decoder = keras.Sequential(name = "decoder")
  if layer_count % 2 == 1:
    decoder.add(layers.Conv2DTranspose(channel_count_de, (kernel_width_de, data_shape[1]), strides=(8, 1), activation=acfun_out, padding='same'))
  else:
    decoder.add(layers.Conv2DTranspose(channel_count_de, (kernel_width_de, data_shape[1]), strides=(4, 1), activation=acfun_out, padding='same'))
  for _ in range(layer_count // 2):
    decoder.add(layers.Conv2DTranspose(channel_count_de, (kernel_width_de, data_shape[1]), strides=(4, 1), activation=acfun_out, padding='same'))
  decoder.add(layers.Conv2D(1, (kernel_width_de, data_shape[1]), activation='linear', padding='same'))

  return (encoder, decoder)

def create_model(data_shape, activation_en='elu', activation_de='elu', optimizer='adam', kernel_width_en=256, kernel_width_de=64, channel_count_en=16, channel_count_de=16, compression_factor=8, loss='mse', regularizer = None):
  """creates an autoencoder"""
  enc_dec = create_encoder_decoder(data_shape, compression_factor, activation_en, activation_de, optimizer, kernel_width_en, kernel_width_de, channel_count_en, channel_count_de, regularizer)
  model = keras.Sequential(name = "autoencoder")
  model.add(enc_dec[0])
  model.add(enc_dec[1])
  model.compile(
          loss=loss,
          optimizer=optimizer,
          metrics=[keras.metrics.MeanSquaredError()])
  return (model, enc_dec)

In [12]:
def normalize(data):
  """normalizes the data and returns the parameters necessary to reconstruct the original"""
  MIN = data.min()
  data = data - MIN
  DIV = data.max()
  data = data / DIV
  return (data, MIN, DIV)

def denormalize(data_tuple):
  """reverts the normalization"""
  (data, MIN, DIV) = data_tuple
  return data * DIV + MIN

def compression_exponent(factor):
  """returns the smalles n for 2^n > factor"""
  return np.int_(np.ceil(np.log2(np.float64(factor))))


In [13]:

def generate_data_for_training(song_data, compression_factor, window_width, stride, batch_size=8):
  """creates a generator, that returns batches of the training data
  
     returns the Generator and the number of steps per epoch
  """
  comp_fac = 2 ** (compression_exponent(compression_factor) + 2)
  window_width += (comp_fac - (window_width % comp_fac)) % comp_fac
  segment_indices = [(i, i + window_width) for i in range(0, max(1, song_data.shape[0] - window_width), stride)]
  def generator():
    while True:
      windows = map(lambda seg: song_data[seg[0] : seg[1]], segment_indices)
      windows = map(lambda segment: np.concatenate([segment, np.zeros((window_width - segment.shape[0], segment.shape[1]))]), windows)
      while True:
        targets = []
        for i, w in zip(range(batch_size), windows):
          targets.append(w.reshape((1, window_width, song_data.shape[1], 1)))
        if len(targets) > 0:
          targets = np.concatenate(targets)
          yield (targets, targets)
        else:
          break

  return (generator(), int(np.ceil(len(segment_indices) / batch_size))) 
  
def transform_data_for_model(song_data, compression_factor):
  """transforms data to have a compatible size fopr the model"""
  comp_fac = 2 ** (compression_exponent(compression_factor) + 2)
  padding_size = (comp_fac - (song_data.shape[0] % comp_fac)) % comp_fac
  nd = np.concatenate([song_data, np.zeros((padding_size, song_data.shape[1]))])
  nd = nd.reshape((1, nd.shape[0], nd.shape[1], 1))
  return (nd, padding_size)

def transform_data_from_model(model_data, padding):
  """reverts the transform_data_for_model function"""
  data = model_data[0][:model_data.shape[1] - padding]
  return data.reshape((data.shape[0], data.shape[1]))

def predict(data, model, compression_factor, overlap = 2**16, segment_size = 2**18):
  """predicts the data the model"""
  data_segments = [data[max(0, i - overlap):min(i + overlap + segment_size, data.shape[0])] for i in range(0, data.shape[0], segment_size)]
  prepared_data = [transform_data_for_model(segment, compression_factor) for segment in data_segments]
  model_input = [d[0] for d in prepared_data]
  raw_prediction = [model.predict(i) for i in model_input]
  prediction = [transform_data_from_model(tup[0], tup[1][1]) for tup in zip(raw_prediction, prepared_data)]
  padding_free_prediction = [prediction[0][:segment_size]] + [pred[overlap : -overlap] for pred in prediction[1:-1]] + [prediction[-1][overlap:]]
  return np.concatenate(padding_free_prediction)

def evaluate(data, model, compression_factor, overlap = 2**16, segment_size = 2**18):
  """evaluates the model based on mse"""
  pred = predict(data, model, compression_factor, overlap, segment_size)
  return np.sum((pred - data) ** 2) / data.size

def compress(data, compressor, compression_factor, overlap = 2 ** 16, segment_size = 2 ** 18):
  """Compress the given input data and returns the code"""  
  data_segments = [data[max(0, i - overlap):min(i + overlap + segment_size, data.shape[0])] for i in range(0, data.shape[0], segment_size)]
  prepared_data = [transform_data_for_model(segment, compression_factor) for segment in data_segments]
  preds = [[compressor.predict(i[0]), np.int32(i[1])] for i in prepared_data]
  return preds 

def decompress(data, decompressor, compression_factor, overlap = 2 ** 16, segment_size = 2 ** 18):
  """decompress the compressed data"""  
  model_input = [d[0] for d in data]
  raw_prediction = [decompressor.predict(i) for i in model_input]
  prediction = [transform_data_from_model(tup[0], tup[1][1]) for tup in zip(raw_prediction, data)]
  padding_free_prediction = [prediction[0][:segment_size]] + [pred[overlap : -overlap] for pred in prediction[1:-1]] + [prediction[-1][overlap:]]
  return np.concatenate(padding_free_prediction) 

In [14]:
song_data, samplerate = sf.read("input.flac")
(normalized_data, MIN, DIV) = normalize(song_data)


NameError: name 'PATH_PREFIX' is not defined

In [15]:
compression_factor = 4

activation_en = 'selu'
activation_de = 'selu'
channel_size_en = 16
channel_size_de = 16
kernel_size_en = 64
kernel_size_de = 64
optimizer = 'adam'
loss = 'mae'


(model, enc_dec) = create_model((None, normalized_data.shape[1], 1),
                            activation_en = activation_en,
                            activation_de = activation_de,
                            optimizer = optimizer,
                            kernel_width_en = kernel_size_en,
                            kernel_width_de = kernel_size_de,
                            channel_count_en = channel_size_en,
                            channel_count_de = channel_size_de,
                            compression_factor = compression_factor,
                            loss = loss)
model.summary()

NameError: name 'normalized_data' is not defined

In [16]:
data_generator_small, steps_per_epoch_small = generate_data_for_training(normalized_data, compression_factor, 2 ** 18, 2 ** 16, 4)
data_generator_big, steps_per_epoch_big = generate_data_for_training(normalized_data, compression_factor, 2 ** 20, 2 ** 18, 4)

history_small = model.fit_generator(data_generator_small, steps_per_epoch = steps_per_epoch_small, epochs = 16)
history_big = model.fit_generator(data_generator_big, steps_per_epoch = steps_per_epoch_big, epochs = 48)

NameError: name 'normalized_data' is not defined

In [None]:
enc_dec[1].save_weights('weights.h5')

In [None]:
compressed = compress(normalized_data, enc_dec[0], compression_factor)

In [None]:
saveable_compressed = np.asarray([compressed, MIN, DIV, samplerate])
np.save('compressed.npy', saveable_compressed)