# Autoencoders

In [None]:
# Autoreloading makes development easier
%load_ext autoreload
%autoreload 2

In [None]:
#!pip install tensorflow[and-cuda]==2.11
#!pip install tensorflow_probability==0.19

In [None]:
# Import libraries
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as krs
import tensorflow_probability as tfp
import json
from sklearn.model_selection import train_test_split
from tools.audio_tools import read_audio, write_audio, play_audio
from tools.feature_tools import compute_mels, compute_imels, compute_mfcc, compute_imfcc, load_data, normalize_features, denormalize_features
from tools.constants import npy_classical_path, npy_jazz_path, models_path
from tools.plot_tools import make_figax, plot_history, plot_audio, plot_spectral_feature
from tools.tensorflow_tools import tune_hyperparameters, load_optimal_params

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from tools.constants import npy_classical_path, npy_jazz_path, models_path
from tools.feature_tools import compute_mels, compute_imels, compute_mfcc, compute_imfcc, load_data, normalize_features, denormalize_features
from sklearn.model_selection import train_test_split
from tools.plot_tools import make_figax, plot_history, plot_audio, plot_spectral_feature

## Data Loading

### Load Audio & Extract Features

In [None]:
print(f"{len(os.listdir(npy_classical_path)) = }")
print(f"{len(os.listdir(npy_jazz_path)) = }")

In [None]:
# Load samples from both styles
n_samples = 1000
n_samples = min(n_samples, len(os.listdir(npy_classical_path)), len(os.listdir(npy_jazz_path)))
print(f"{n_samples = }")
X_classical_raw = load_data(npy_classical_path, n_samples=n_samples)
X_jazz_raw = load_data(npy_jazz_path, n_samples=n_samples)

# Concatenate (train on all samples)
X_raw = np.concatenate((X_classical_raw, X_jazz_raw))

# Print feature shape
print(f"{X_raw.shape = }")

### Train - Validation - Test Split

In [None]:
# Fraction of data to keep apart for validation
test_size = round(0.1 * len(X_raw))
val_size = test_size
# Perform split
X_raw_train, X_raw_test = train_test_split(X_raw, test_size=test_size, random_state=1234)
X_raw_train, X_raw_val = train_test_split(X_raw_train, test_size=val_size, random_state=1234)
# Verify split shapes
print(f"{X_raw_train.shape = }")
print(f"{X_raw_test.shape = }")
print(f"{X_raw_val.shape = }")

In [None]:
input_shape = X_raw_train.shape[1:]
print(f"{input_shape = }")

### Normalize Features

In [None]:
# Perform normalization
X_train, X_val, X_test = normalize_features(X_raw_train, X_raw_val=X_raw_val, X_raw_test=X_raw_test)

print(f"{X_train.shape = }")
print(f"{X_val.shape = }")
print(f"{X_test.shape = }")

In [None]:
# See the effect of normalization
fig, ax = make_figax()
ax.plot(np.std(X_raw_train, axis=(0,1,3)), label="std")
ax.plot(np.mean(X_raw_train, axis=(0,1,3)), label="mean")
#ax.plot(np.max(X_raw_train, axis=(0,1,3)), label="max")
ax.grid()
ax.legend()
#ax.set_title("Raw")
fig.tight_layout()
fig.savefig(os.path.join("./results", "DataExploration", "distribution_raw.png"), dpi=300, facecolor="white")
plt.show()


fig, ax = make_figax()
ax.plot(np.std(X_train, axis=(0,1,3)), label="std")
ax.plot(np.mean(X_train, axis=(0,1,3)), label="mean")
#ax.plot(np.max(X_train, axis=(0,1,3)), label="max")
ax.grid()
ax.legend()
#ax.set_title("Normalized")
fig.tight_layout()
fig.savefig(os.path.join("./results", "DataExploration", "distribution_preprocessed.png"), dpi=300, facecolor="white")
plt.show()

## Autoencoders

### Test Script

In [None]:
nop = lambda x: x

def test_autoencoder(autoencoder, test_set = X_test, plot_transforms = True, plot_raw_transforms = True, plot_waveforms = True):
  # Pick sample at random
  print("Picking Sample")
  choice = np.random.choice(len(test_set))
  x = test_set[choice]

  # Autencoder
  print("Autencoding")
  x_hat = np.array(autoencoder(x.reshape((1, *x.shape))))

  if plot_transforms:
    print("Plotting transforms")
    fig, ax = plot_spectral_feature(x, fn=nop)
    ax.set_title("Original Preprocessed Spectrum")
    fig, ax = plot_spectral_feature(x_hat, fn=nop)
    ax.set_title("Reconstructed Preprocessed Spectrum")
    plt.show()

  # Postprocess
  print("Postprocessing")
  x_raw = denormalize_features(x.reshape((1, *x.shape)))[0]
  x_raw_hat = denormalize_features(x_hat.reshape((1, *x.shape)))[0]

  if plot_raw_transforms:
    print("Plotting transforms")
    fig, ax = plot_spectral_feature(x_raw)
    ax.set_title("Original Raw Spectrum")
    fig, ax = plot_spectral_feature(x_raw_hat)
    ax.set_title("Reconstructed Raw Spectrum")
    plt.show()

  # Reconstruct Audio
  print("Reconstructing Audio")
  s = np.squeeze(compute_imels(x_raw))
  s_hat = np.squeeze(compute_imels(x_raw_hat))

  if plot_waveforms:
    fig, ax = plot_audio(s)
    ax.set_title("Original Waveform")
    fig, ax = plot_audio(s_hat)
    ax.set_title("Reconstructed Waveform")
    plt.show()

  # Play audio
  print("Original Audio")
  player = play_audio(s)
  print("Reconstructed Audio")
  player = play_audio(s_hat)
  print()

In [None]:
def free_memory(autoencoder):
  krs.backend.clear_session()
  del autoencoder.encoder
  del autoencoder.decoder
  del autoencoder
  krs.backend.clear_session()

In [None]:
os.makedirs(models_path, exist_ok = True)
def save_autoencoder(autoencoder, params, history, name):
    # Define and make directory
    autoencoder_path = os.path.join(models_path, name)
    os.makedirs(autoencoder_path, exist_ok = True)
    
    # Save parameters
    with open(os.path.join(autoencoder_path, "params.json"), "w") as params_file:
        json.dump(params, params_file)
        
    # Save history
    with open(os.path.join(autoencoder_path, "history.json"), "w") as history_file:
        if not isinstance(history, dict):
            history = history.history
        json.dump(history, history_file)
    
    # Save model
    autoencoder.save(os.path.join(autoencoder_path, "model.keras"))

def load_autoencoder(name, load_model = True):
    # Define path
    autoencoder_path = os.path.join(models_path, name)
    
    # Load parameters
    with open(os.path.join(autoencoder_path, "params.json"), "r") as params_file:
        params = json.load(params_file)
    
    # Load history
    with open(os.path.join(autoencoder_path, "history.json"), "r") as history_file:
        history = json.load(history_file)
    
    # Load model
    autoencoder = None
    if load_model:
        autoencoder = krs.models.load_model(os.path.join(autoencoder_path, "model.keras"))
    
    return autoencoder, params, history

### Convolutional Autoencoder

In [None]:
from models.autoencoders import Conv2DAutoencoder

In [None]:
default_params = {
    "feature_shape": input_shape,
    "compression": 4,
    "kernel_size": 3,
    "conv_depth": 1,
    "input_chans_multiplier": 1,
    "skip_connection": False,
    "pooling_type": "average",
}

compile_kwargs = {
    "optimizer": "adam",
    "loss": "mae",
}

results_path = os.path.join("./results/Conv2DAutoencoder")
os.makedirs(results_path, exist_ok=True)

In [None]:
# Test training
autoencoder = Conv2DAutoencoder(**default_params)
autoencoder.compile(**compile_kwargs)
history = autoencoder.fit(X_train, X_train, epochs=1, shuffle=True, validation_data=(X_val, X_val), verbose=1)

In [None]:
# Hyperparameter tuning
keys = np.array([
    "input_chans_multiplier",
    "conv_depth",
    "kernel_size",
    "pooling_type",
])
vals = np.array([
    [1, 2, 4],
    [1, 2, 3],
    [3, 5],
    ["max", "average"],
], dtype="object")
loss_key = "val_loss"
epochs = 250
passes = 1

In [None]:
# Random optimization
optimal_params, optimal_loss = tune_hyperparameters(
    X_train,
    X_train,
    Conv2DAutoencoder,
    default_params,
    keys,
    vals,
    loss_key,
    X_val=X_val,
    y_val=X_val,
    results_path=results_path,
    epochs=epochs,
    compile_kwargs=compile_kwargs,
    verbose=False,
    do_random=True,
    random_attempts=25,
)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Sequential optimization
for i in range(passes):
    # Random optimization
    permutation = np.random.permutation(len(keys))
    optimal_params, optimal_loss = tune_hyperparameters(
        X_train,
        X_train,
        Conv2DAutoencoder,
        optimal_params,
        keys[permutation],
        vals[permutation],
        loss_key,
        X_val=X_val,
        y_val=X_val,
        results_path=results_path,
        epochs=epochs,
        compile_kwargs=compile_kwargs,
        verbose=False,
        do_random=False,
    )

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Load optimal hyperparameters from tuning process
optimal_params, optimal_loss = load_optimal_params(results_path, loss_key)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Manual tuning
optimal_params = {
    "feature_shape": input_shape,
    "compression": 4,
    "kernel_size": 5,
    "conv_depth": 4,
    "input_chans_multiplier": 1,
    "skip_connection": True,
    "pooling_type": "average",
}

In [None]:
# Retrain with optimal params
autoencoder = Conv2DAutoencoder(**optimal_params)
autoencoder.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor=loss_key, patience=5, min_delta=1e-6)

In [None]:
history = autoencoder.fit(X_train, X_train, epochs=250, shuffle=True, validation_data=(X_val, X_val), callbacks=[earlystopping], verbose=1)

In [None]:
# Save model
autoencoder.save_weights(os.path.join(results_path, "model"))

In [None]:
# Make sure loading works
autoencoder = Conv2DAutoencoder(**optimal_params)
autoencoder.load_weights(os.path.join(results_path, "model"))

In [None]:
# Test autoencoder
test_autoencoder(autoencoder, X_test, False, True, False)

### Variational Autencoder

In [None]:
from models.autoencoders import VariationalAutoencoder

In [None]:
default_params = {
    "feature_shape": input_shape,
    "compression": 8,
    "kernel_size": 3,
    "conv_depth": 1,
    "input_chans_multiplier": 1,
    "skip_connection": False,
    "pooling_type": "average",
    "kl_reg": 1,
    "vol_reg": 1,
}

results_path = os.path.join("./results/VariationalAutoencoder")
os.makedirs(results_path, exist_ok=True)

In [None]:
# Test Training
autoencoder = VariationalAutoencoder(**default_params)
autoencoder.compile(optimizer="adam")
history = autoencoder.fit(X_train, X_train, epochs=1, shuffle=True, validation_data=(X_val, X_val), verbose=1)

In [None]:
# Manual parameter tuning
optimal_params = {
    "feature_shape": input_shape,
    "compression": 4,
    "kernel_size": 5,
    "conv_depth": 4,
    "input_chans_multiplier": 1,
    "skip_connection": True,
    "pooling_type": "average",
    "kl_reg": 1e-10,
    "vol_reg": 1,
}

In [None]:
# Retrain with all data & optimal parameters
autoencoder = VariationalAutoencoder(**optimal_params)
autoencoder.compile(optimizer="adam")
earlystopping = krs.callbacks.EarlyStopping(monitor="val_r_loss", patience=5, min_delta=1e-6)

In [None]:
history = autoencoder.fit(X_train, X_train, epochs=10, shuffle=True, validation_data=(X_val, X_val), callbacks=[earlystopping], verbose=1)

In [None]:
# Save model
autoencoder.save_weights(os.path.join(results_path, "model"))

In [None]:
# Load autoencoder
autoencoder = VariationalAutoencoder(**optimal_params)
autoencoder.load_weights(os.path.join(results_path, "model"))

In [None]:
test_autoencoder(autoencoder, X_test, False, True, False)

In [None]:
# TODO: Test creating multiple variations of the same input data


### GAN Autoencoder

In [None]:
from models.autoencoders import GANGenerator, GANDiscriminator
from models.layers import GAN

In [None]:
def create_gan_model(**params):
    # Split generator & discriminator params
    g_params = {}
    d_params = {}
    for key, value in params.items():
        if key[:2] == "g_":
            # Generator param
            g_params[key[2:]] = value
        elif key[:2] == "d_":
            # Discriminator param
            d_params[key[2:]] = value
        else:
            # Shared param
            g_params[key] = value
            d_params[key] = value
    
    # Instantiate model
    generator = GANGenerator(**g_params)
    discriminator = GANDiscriminator(**d_params)
    gan = GAN(generator, discriminator)
    return gan

In [None]:
default_params = {
    "feature_shape": input_shape,
    "g_compression": 8,
    "g_kernel_size": 3,
    "g_conv_depth": 1,
    "g_input_chans_multiplier": 1,
    "g_skip_connection": False,
    "g_pooling_type": "average",
    "g_gan_reg": 0.1,
    "g_c_reg": 0.1,
    "g_s_reg": 0.1,
    "g_mode": "adain",
    "g_hidden_activation": "relu",
    "d_mlp_layers": 2,
    "d_conv_layers": 2,
    "d_conv_kernel_size": 3,
    "d_conv_pooling_size": 4,
    "d_conv_pooling_type": "max",
}

compile_kwargs={
    "g_optimizer": "adam",
    "d_optimizer": "adam",
}

results_path = os.path.join("./results/GANAutoencoder")
os.makedirs(results_path, exist_ok=True)

In [None]:
# Test training
gan = create_gan_model(**default_params)
gan.compile(**compile_kwargs)
history = gan.fit(X_train, X_train, epochs=1, shuffle=True, verbose=1, validation_data=(X_val, X_val))

In [None]:
# Manual parameter tuning
optimal_params = {
    "feature_shape": input_shape,
    "g_compression": 4,
    "g_kernel_size": 5,
    "g_conv_depth": 3,
    "g_input_chans_multiplier": 1,
    "g_skip_connection": True,
    "g_pooling_type": "average",
    "g_gan_reg": 0.025,
    "g_c_reg": 0.01,
    "g_s_reg": 0.01,
    "g_mode": "adain",
    "g_hidden_activation": "relu",
    "d_mlp_layers": 2,
    "d_conv_layers": 2,
    "d_conv_kernel_size": 3,
    "d_conv_pooling_size": 4,
    "d_conv_pooling_type": "max",
}

In [None]:
# Retrain with optimal params
gan = create_gan_model(**optimal_params)
gan.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor="val_r_loss", patience=5, min_delta=1e-6)

In [None]:
history = gan.fit(X_train, X_train, epochs=250, shuffle=True, verbose=1, callbacks=[earlystopping], validation_data=(X_val, X_val))

In [None]:
# Save model
gan.generator.save_weights(os.path.join(results_path, "generator"))
gan.discriminator.save_weights(os.path.join(results_path, "discriminator"))

In [None]:
# Load model
gan = create_gan_model(**optimal_params)
gan.generator.load_weights(os.path.join(results_path, "generator"))
gan.discriminator.load_weights(os.path.join(results_path, "discriminator"))

In [None]:
# Test model
def autoencoder_real(X):
    X_real, X_fake = gan.generator(X)
    return np.array(X_real)

def autoencoder_fake(X):
    X_real, X_fake = gan.generator(X)
    return np.array(X_fake)

In [None]:
test_autoencoder(autoencoder_real, X_test, False, True, False)

In [None]:
test_autoencoder(autoencoder_fake, X_test, False, True, False)

### MUNIT Autoencoder

In [None]:
from models.autoencoders import MUNITGenerator, GANDiscriminator
from models.layers import GAN

In [None]:
def create_gan_model(**params):
    # Split generator & discriminator params
    g_params = {}
    d_params = {}
    for key, value in params.items():
        if key[:2] == "g_":
            # Generator param
            g_params[key[2:]] = value
        elif key[:2] == "d_":
            # Discriminator param
            d_params[key[2:]] = value
        else:
            # Shared param
            g_params[key] = value
            d_params[key] = value
    
    # Instantiate model
    generator = MUNITGenerator(**g_params)
    discriminator = GANDiscriminator(**d_params)
    gan = GAN(generator, discriminator)
    return gan

In [None]:
default_params = {
    "feature_shape": input_shape,
    "g_compression": 8,
    "g_style_dim": 8,
    "g_kernel_size": 3,
    "g_conv_depth": 1,
    "g_input_chans_multiplier": 1,
    "g_skip_connection": False,
    "g_pooling_type": "average",
    "g_gan_reg": 0.1,
    "g_c_reg": 0.1,
    "g_s_reg": 0.1,
    "g_adain_momentum": 0.1,
    "g_adain_epsilon": 1e-5,
    "d_mlp_layers": 2,
    "d_conv_layers": 2,
    "d_conv_kernel_size": 3,
    "d_conv_pooling_size": 4,
    "d_conv_pooling_type": "max",
}

compile_kwargs={
    "g_optimizer": "adam",
    "d_optimizer": "adam",
}

In [None]:
# Test training
gan = create_gan_model(**default_params)
gan.compile(**compile_kwargs)

In [None]:
history = gan.fit(X_train, X_train, epochs=1, shuffle=True, verbose=1, validation_data=(X_val, X_val))

In [None]:
# Manual tuning
results_path = os.path.join("./results/MUNITAutoencoder")
os.makedirs(results_path, exist_ok=True)

optimal_params = {
    "feature_shape": input_shape,
    "g_compression": 4,
    "g_style_dim": 8,
    "g_kernel_size": 5,
    "g_conv_depth": 4,
    "g_input_chans_multiplier": 1,
    "g_skip_connection": False,
    "g_pooling_type": "average",
    "g_gan_reg": 0.01,
    "g_c_reg": 0.01,
    "g_s_reg": 0.01,
    "g_adain_momentum": 0.1,
    "g_adain_epsilon": 1e-5,
    "d_mlp_layers": 2,
    "d_conv_layers": 2,
    "d_conv_kernel_size": 3,
    "d_conv_pooling_size": 4,
    "d_conv_pooling_type": "max",
}

In [None]:
# Retrain with optimal params
gan = create_gan_model(**optimal_params)
gan.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor="val_r_loss", patience=5, min_delta=1e-6)

In [None]:
history = gan.fit(X_train, X_train, epochs=250, shuffle=True, verbose=1, callbacks=[earlystopping], validation_data=(X_val, X_val))

In [None]:
# Save model
gan.generator.save(os.path.join(results_path, "generator.keras"))

In [None]:
# Load model
generator = krs.models.load_model(os.path.join(results_path, "generator.keras"))
gan = GAN(generator, None)

In [None]:
# Test model
def autoencoder_real(X):
    X_real, X_fake = gan.generator(X)
    return np.array(X_real)

def autoencoder_fake(X):
    X_real, X_fake = gan.generator(X)
    return np.array(X_fake)

In [None]:
test_autoencoder(autoencoder_real, X_test, False, True, False)

In [None]:
test_autoencoder(autoencoder_fake, X_test, False, True, False)