# Style Transcoders

In [2]:
# Autoreloading makes development easier
%load_ext autoreload
%autoreload 2

In [3]:
# Import libraries
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as krs
import tensorflow_probability as tfp
import json
from sklearn.model_selection import train_test_split
from tools.audio_tools import read_audio, write_audio, play_audio
from tools.feature_tools import compute_mels, compute_imels, compute_mfcc, compute_imfcc, load_data, normalize_features, denormalize_features
from tools.constants import cut_classical_path, cut_jazz_path, models_path
from tools.plot_tools import make_figax, plot_history, plot_audio, plot_spectral_feature
from tools.tensorflow_tools import tune_hyperparameters, load_optimal_params

2023-11-18 15:38:11.724019: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-18 15:38:11.762260: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-18 15:38:11.762291: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-18 15:38:11.762325: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-18 15:38:11.769791: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-18 15:38:11.770691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

## Data Loading

### Load Audio & Extract Features

In [4]:
n_samples = 5000
n_samples = min(n_samples, len(os.listdir(cut_classical_path)), len(os.listdir(cut_jazz_path)))

X_c_raw = load_data(cut_classical_path, n_samples=n_samples)
X_j_raw = load_data(cut_jazz_path, n_samples=n_samples)

print(f"{X_c_raw.shape = }")
print(f"{X_j_raw.shape = }")

100 %
100 %
X_c_raw.shape = (418, 67, 128, 1)
X_j_raw.shape = (418, 67, 128, 1)


### Train - Validation - Test Split

In [5]:
# Fraction of data to keep apart for validation
test_size = round(0.1 * n_samples)
val_size = test_size
# Perform splits
X_c_raw_train, X_c_raw_test = train_test_split(X_c_raw, test_size=test_size)
X_c_raw_train, X_c_raw_val = train_test_split(X_c_raw_train, test_size=val_size)
X_j_raw_train, X_j_raw_test = train_test_split(X_j_raw, test_size=test_size)
X_j_raw_train, X_j_raw_val = train_test_split(X_j_raw_train, test_size=val_size)
# Verify split shapes
print(f"{X_c_raw_train.shape = }")
print(f"{X_c_raw_test.shape = }")
print(f"{X_c_raw_val.shape = }")
print(f"{X_j_raw_train.shape = }")
print(f"{X_j_raw_test.shape = }")
print(f"{X_j_raw_val.shape = }")

X_c_raw_train.shape = (334, 67, 128, 1)
X_c_raw_test.shape = (42, 67, 128, 1)
X_c_raw_val.shape = (42, 67, 128, 1)
X_j_raw_train.shape = (334, 67, 128, 1)
X_j_raw_test.shape = (42, 67, 128, 1)
X_j_raw_val.shape = (42, 67, 128, 1)


### Normalize Features

In [6]:
# Perform normalization
X_c_train, X_c_val, X_c_test = normalize_features(X_c_raw_train, X_raw_val=X_c_raw_val, X_raw_test=X_c_raw_test, name="classical")
X_j_train, X_j_val, X_j_test = normalize_features(X_j_raw_train, X_raw_val=X_j_raw_val, X_raw_test=X_j_raw_test, name="jazz")

print(f"{X_c_train.shape }")
print(f"{X_c_val.shape }")
print(f"{X_c_test.shape }")
print(f"{X_j_train.shape }")
print(f"{X_j_val.shape }")
print(f"{X_j_test.shape }")

(334, 67, 128, 1)
(42, 67, 128, 1)
(42, 67, 128, 1)
(334, 67, 128, 1)
(42, 67, 128, 1)
(42, 67, 128, 1)


In [7]:
input_shape = X_c_train.shape[1:]

print(f"{input_shape = }")

input_shape = (67, 128, 1)


## Style Transfer Models

### Test Script

In [None]:
# TODO: Develop style transfer test script

### Convolutional Style Transfer

In [9]:
from models.transcoders import Conv2DTranscoder

In [42]:
default_params = {
    "feature_shape": input_shape,
    "compression": 8,
    "kernel_size": 3,
    "conv_depth": 1,
    "input_chans_multiplier": 1,
    "skip_connection": False,
    "pooling_type": "average",
    "h_reg": 1e-3,
    "kl_reg": 1e-6,
}

In [44]:
# Test training
transcoder = Conv2DTranscoder(**default_params)
transcoder.compile(optimizer="adam")
history = transcoder.fit(X_c_train, X_j_train, epochs=1, shuffle=True, validation_data=(X_c_val, X_j_val), verbose=1)



In [45]:
# Hyperparameter tuning
results_path = os.path.join("./results/Conv2DTranscoder")
os.makedirs(results_path, exist_ok=True)
keys = np.array([
    "input_chans_multiplier",
    "conv_depth",
    "kernel_size",
    "pooling_type",
    "h_reg",
    "kl_reg",
])
vals = np.array([
    [1, 2, 4],
    [1, 2, 3],
    [3, 5],
    ["max", "average"],
    [1, 1e-1, 1e-2, 1e-3],
    [1e-4, 1e-5, 1e-6, 1e-7, 1e-8],
], dtype="object")
loss_key = "val_r_loss"
epochs = 250
passes = 1

In [49]:
# Random optimization
optimal_params, optimal_loss = tune_hyperparameters(
    X_c_train,
    X_j_train,
    Conv2DTranscoder,
    default_params,
    keys,
    vals,
    loss_key,
    X_val=X_c_val,
    y_val=X_j_val,
    results_path=results_path,
    epochs=epochs,
    verbose=False,
    do_random=True,
    random_attempts=25,
)

Optimizing for default parameters


KeyboardInterrupt: 

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Sequential optimization
for i in range(passes):
    # Random optimization
    permutation = np.random.permutation(len(keys))
    optimal_params, optimal_loss = tune_hyperparameters(
        X_c_train,
        X_j_train,
        Conv2DTranscoder,
        default_params,
        keys,
        vals,
        loss_key,
        X_val=X_c_val,
        y_val=X_j_val,
        results_path=results_path,
        epochs=epochs,
        verbose=False,
        do_random=False,
    )

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Load optimal hyperparameters from tuning process
optimal_params, optimal_loss = load_optimal_params(results_path, loss_key)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Retrain with optimal params
autoencoder = Conv2DAutoencoder(**optimal_params)
autoencoder.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor=loss_key, patience=5, min_delta=1e-6)
history = autoencoder.fit(X_train, X_train, epochs=250, shuffle=True, validation_data=(X_val, X_val), callbacks=[earlystopping], verbose=1)

In [None]:
# Test transcoder

### Variational Style Transfer

In [50]:
from models.transcoders import VariationalTranscoder

In [62]:
default_params = {
    "feature_shape": input_shape,
    "compression": 8,
    "kernel_size": 3,
    "conv_depth": 1,
    "input_chans_multiplier": 1,
    "skip_connection": False,
    "pooling_type": "average",
    "h_reg": 1e-3,
    "kl_reg": 1e-6,
}

In [63]:
# Test training
transcoder = VariationalTranscoder(**default_params)
transcoder.compile(optimizer="adam")
history = transcoder.fit(X_c_train, X_j_train, epochs=5, shuffle=True, validation_data=(X_c_val, X_j_val), verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Hyperparameter tuning
results_path = os.path.join("./results/VariationalTranscoder")
os.makedirs(results_path, exist_ok=True)
keys = np.array([
    "input_chans_multiplier",
    "conv_depth",
    "kernel_size",
    "pooling_type",
    "h_reg",
    "kl_reg",
])
vals = np.array([
    [1, 2, 4],
    [1, 2, 3],
    [3, 5],
    ["max", "average"],
    [1, 1e-1, 1e-2, 1e-3],
    [1e-4, 1e-5, 1e-6, 1e-7, 1e-8],
], dtype="object")
loss_key = "val_r_loss"
epochs = 250
passes = 1

In [None]:
# Random optimization
optimal_params, optimal_loss = tune_hyperparameters(
    X_c_train,
    X_j_train,
    VariationalTranscoder,
    default_params,
    keys,
    vals,
    loss_key,
    X_val=X_c_val,
    y_val=X_j_val,
    results_path=results_path,
    epochs=epochs,
    verbose=False,
    do_random=True,
    random_attempts=25,
)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Sequential optimization
for i in range(passes):
    # Random optimization
    permutation = np.random.permutation(len(keys))
    optimal_params, optimal_loss = tune_hyperparameters(
        X_c_train,
        X_j_train,
        VariationalTranscoder,
        default_params,
        keys,
        vals,
        loss_key,
        X_val=X_c_val,
        y_val=X_j_val,
        results_path=results_path,
        epochs=epochs,
        verbose=False,
        do_random=False,
    )

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Load optimal hyperparameters from tuning process
optimal_params, optimal_loss = load_optimal_params(results_path, loss_key)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Retrain with optimal params
autoencoder = Conv2DAutoencoder(**optimal_params)
autoencoder.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor=loss_key, patience=5, min_delta=1e-6)
history = autoencoder.fit(X_train, X_train, epochs=250, shuffle=True, validation_data=(X_val, X_val), callbacks=[earlystopping], verbose=1)

In [None]:
# Test transcoder

### GAN Style Transfer

In [64]:
from models.transcoders import GANTranscoder, GANDiscriminators
from models.layers import GAN

In [65]:
def create_gan_model(**params):
    # Split generator & discriminator params
    g_params = {}
    d_params = {}
    for key, value in params.items():
        if key[:2] == "g_":
            # Generator param
            g_params[key[2:]] = value
        elif key[:2] == "d_":
            # Discriminator param
            d_params[key[2:]] = value
        else:
            # Shared param
            g_params[key] = value
            d_params[key] = value
    
    generator = GANTranscoder(**g_params)
    discriminator = GANDiscriminators(**d_params)
    gan = GAN(generator, discriminator)
    return gan

In [66]:
default_params = {
    "feature_shape": input_shape,
    "g_compression": 8,
    "g_kernel_size": 3,
    "g_conv_depth": 1,
    "g_input_chans_multiplier": 1,
    "g_skip_connection": False,
    "g_pooling_type": "average",
    "g_gan_reg": 0.1,
    "g_c_reg": 0.1,
    "g_s_reg": 0.1,
    "d_mlp_layers": 2,
    "d_conv_compression": 8,
    "d_conv_kernel_size": 3,
    "d_conv_pooling_type": "max",
}

compile_kwargs={
    "g_optimizer": "adam",
    "d_optimizer": "adam",
}

In [71]:
# Test training
gan = create_gan_model(**default_params)
gan.compile(**compile_kwargs)
history = gan.fit(X_c_train, X_j_train, epochs=1, shuffle=True, verbose=1, validation_data=(X_c_val, X_j_val))



In [72]:
# Hyperparameter tuning
results_path = os.path.join("./results/GANTranscoder")
os.makedirs(results_path, exist_ok=True)
keys = np.array([
    "g_gan_reg",
    "g_c_reg",
    "g_s_reg",
    "g_input_chans_multiplier",
    "g_conv_depth",
    "g_kernel_size",
    "g_pooling_type",
])
vals = np.array([
    [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
    [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
    [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
    [1, 2, 4],
    [1, 2, 3],
    [3, 5],
    ["max", "average"],
], dtype="object")
loss_key="val_r_loss"
epochs = 250
passes = 1
optimal_params = default_params.copy()

In [None]:
# Random optimization
optimal_params, optimal_loss = tune_hyperparameters(
    X_c_train,
    X_j_train,
    create_gan_model,
    default_params,
    keys,
    vals,
    loss_key,
    X_val=X_c_val,
    y_val=X_j_val,
    results_path=results_path,
    epochs=epochs,
    verbose=False,
    do_random=True,
    random_attempts=25,
    compile_kwargs=compile_kwargs,
)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Sequential optimization
for i in range(passes):
    # Random optimization
    permutation = np.random.permutation(len(keys))
    optimal_params, optimal_loss = tune_hyperparameters(
        X_c_train,
        X_j_train,
        create_gan_model,
        default_params,
        keys,
        vals,
        loss_key,
        X_val=X_c_val,
        y_val=X_j_val,
        results_path=results_path,
        epochs=epochs,
        verbose=False,
        do_random=False,
        compile_kwargs=compile_kwargs,
    )

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Load optimal hyperparameters from tuning process
optimal_params, optimal_loss = load_optimal_params(results_path, loss_key)

In [None]:
print(f"{optimal_params = }")
print(f"{optimal_loss = }")

In [None]:
# Retrain with optimal params
gan = create_gan_model(**optimal_params)
gan.compile(**compile_kwargs)
earlystopping = krs.callbacks.EarlyStopping(monitor=loss_key, patience=5, min_delta=1e-6)
history = gan.fit(X_train, X_train, epochs=250, shuffle=True, validation_data=(X_val, X_val), callbacks=[earlystopping], verbose=1)